blob: 40ffd20db79bf798cbff00a4232fd66df407b0bd [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard21a0f912001-02-25 19:54:14 +000088#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000089#define XML_PARSER_BUFFER_SIZE 100
90
Daniel Veillard5997aca2002-03-18 18:36:20 +000091#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
92
Owen Taylor3473f882001-02-23 17:55:21 +000093/*
Owen Taylor3473f882001-02-23 17:55:21 +000094 * List of XML prefixed PI allowed by W3C specs
95 */
96
Daniel Veillardb44025c2001-10-11 22:55:55 +000097static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000098 "xml-stylesheet",
99 NULL
100};
101
102/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000103xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
104 const xmlChar **str);
105
Daniel Veillard257d9102001-05-08 10:41:44 +0000106static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000107xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
108 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000109 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000111
Daniel Veillard8107a222002-01-13 14:10:10 +0000112static void
113xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
114 xmlNodePtr lastNode);
115
Daniel Veillard328f48c2002-11-15 15:24:34 +0000116static int
117xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
118 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000119/************************************************************************
120 * *
121 * Parser stacks related functions and macros *
122 * *
123 ************************************************************************/
124
125xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
126 const xmlChar ** str);
127
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000128/**
129 * inputPush:
130 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000131 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000132 *
133 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000134 *
135 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000136 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000137extern int
138inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
139{
140 if (ctxt->inputNr >= ctxt->inputMax) {
141 ctxt->inputMax *= 2;
142 ctxt->inputTab =
143 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
144 ctxt->inputMax *
145 sizeof(ctxt->inputTab[0]));
146 if (ctxt->inputTab == NULL) {
147 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
148 return (0);
149 }
150 }
151 ctxt->inputTab[ctxt->inputNr] = value;
152 ctxt->input = value;
153 return (ctxt->inputNr++);
154}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000155/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000156 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000157 * @ctxt: an XML parser context
158 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000159 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000160 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000161 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000163extern xmlParserInputPtr
164inputPop(xmlParserCtxtPtr ctxt)
165{
166 xmlParserInputPtr ret;
167
168 if (ctxt->inputNr <= 0)
169 return (0);
170 ctxt->inputNr--;
171 if (ctxt->inputNr > 0)
172 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
173 else
174 ctxt->input = NULL;
175 ret = ctxt->inputTab[ctxt->inputNr];
176 ctxt->inputTab[ctxt->inputNr] = 0;
177 return (ret);
178}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179/**
180 * nodePush:
181 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000182 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 *
184 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000185 *
186 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000187 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000188extern int
189nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
190{
191 if (ctxt->nodeNr >= ctxt->nodeMax) {
192 ctxt->nodeMax *= 2;
193 ctxt->nodeTab =
194 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
195 ctxt->nodeMax *
196 sizeof(ctxt->nodeTab[0]));
197 if (ctxt->nodeTab == NULL) {
198 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
199 return (0);
200 }
201 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202#ifdef MAX_DEPTH
203 if (ctxt->nodeNr > MAX_DEPTH) {
204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
205 ctxt->sax->error(ctxt->userData,
206 "Excessive depth in document: change MAX_DEPTH = %d\n",
207 MAX_DEPTH);
208 ctxt->wellFormed = 0;
209 ctxt->instate = XML_PARSER_EOF;
210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
211 return(0);
212 }
213#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000214 ctxt->nodeTab[ctxt->nodeNr] = value;
215 ctxt->node = value;
216 return (ctxt->nodeNr++);
217}
218/**
219 * nodePop:
220 * @ctxt: an XML parser context
221 *
222 * Pops the top element node from the node stack
223 *
224 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000225 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000226extern xmlNodePtr
227nodePop(xmlParserCtxtPtr ctxt)
228{
229 xmlNodePtr ret;
230
231 if (ctxt->nodeNr <= 0)
232 return (0);
233 ctxt->nodeNr--;
234 if (ctxt->nodeNr > 0)
235 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
236 else
237 ctxt->node = NULL;
238 ret = ctxt->nodeTab[ctxt->nodeNr];
239 ctxt->nodeTab[ctxt->nodeNr] = 0;
240 return (ret);
241}
242/**
243 * namePush:
244 * @ctxt: an XML parser context
245 * @value: the element name
246 *
247 * Pushes a new element name on top of the name stack
248 *
249 * Returns 0 in case of error, the index in the stack otherwise
250 */
251extern int
252namePush(xmlParserCtxtPtr ctxt, xmlChar * value)
253{
254 if (ctxt->nameNr >= ctxt->nameMax) {
255 ctxt->nameMax *= 2;
256 ctxt->nameTab =
257 (xmlChar * *)xmlRealloc(ctxt->nameTab,
258 ctxt->nameMax *
259 sizeof(ctxt->nameTab[0]));
260 if (ctxt->nameTab == NULL) {
261 xmlGenericError(xmlGenericErrorContext, "realloc failed !\n");
262 return (0);
263 }
264 }
265 ctxt->nameTab[ctxt->nameNr] = value;
266 ctxt->name = value;
267 return (ctxt->nameNr++);
268}
269/**
270 * namePop:
271 * @ctxt: an XML parser context
272 *
273 * Pops the top element name from the name stack
274 *
275 * Returns the name just removed
276 */
277extern xmlChar *
278namePop(xmlParserCtxtPtr ctxt)
279{
280 xmlChar *ret;
281
282 if (ctxt->nameNr <= 0)
283 return (0);
284 ctxt->nameNr--;
285 if (ctxt->nameNr > 0)
286 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
287 else
288 ctxt->name = NULL;
289 ret = ctxt->nameTab[ctxt->nameNr];
290 ctxt->nameTab[ctxt->nameNr] = 0;
291 return (ret);
292}
Owen Taylor3473f882001-02-23 17:55:21 +0000293
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000294static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000295 if (ctxt->spaceNr >= ctxt->spaceMax) {
296 ctxt->spaceMax *= 2;
297 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
298 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
299 if (ctxt->spaceTab == NULL) {
300 xmlGenericError(xmlGenericErrorContext,
301 "realloc failed !\n");
302 return(0);
303 }
304 }
305 ctxt->spaceTab[ctxt->spaceNr] = val;
306 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
307 return(ctxt->spaceNr++);
308}
309
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000311 int ret;
312 if (ctxt->spaceNr <= 0) return(0);
313 ctxt->spaceNr--;
314 if (ctxt->spaceNr > 0)
315 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
316 else
317 ctxt->space = NULL;
318 ret = ctxt->spaceTab[ctxt->spaceNr];
319 ctxt->spaceTab[ctxt->spaceNr] = -1;
320 return(ret);
321}
322
323/*
324 * Macros for accessing the content. Those should be used only by the parser,
325 * and not exported.
326 *
327 * Dirty macros, i.e. one often need to make assumption on the context to
328 * use them
329 *
330 * CUR_PTR return the current pointer to the xmlChar to be parsed.
331 * To be used with extreme caution since operations consuming
332 * characters may move the input buffer to a different location !
333 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
334 * This should be used internally by the parser
335 * only to compare to ASCII values otherwise it would break when
336 * running with UTF-8 encoding.
337 * RAW same as CUR but in the input buffer, bypass any token
338 * extraction that may have been done
339 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
340 * to compare on ASCII based substring.
341 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +0000342 * strings without newlines within the parser.
343 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
344 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000345 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
346 *
347 * NEXT Skip to the next character, this does the proper decoding
348 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +0000349 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +0000350 * CUR_CHAR(l) returns the current unicode character (int), set l
351 * to the number of xmlChars used for the encoding [0-5].
352 * CUR_SCHAR same but operate on a string instead of the context
353 * COPY_BUF copy the current unicode char to the target buffer, increment
354 * the index
355 * GROW, SHRINK handling of input buffers
356 */
357
Daniel Veillardfdc91562002-07-01 21:52:03 +0000358#define RAW (*ctxt->input->cur)
359#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +0000360#define NXT(val) ctxt->input->cur[(val)]
361#define CUR_PTR ctxt->input->cur
362
363#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000364 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +0000365 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000366 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000367 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
368 xmlPopInput(ctxt); \
369 } while (0)
370
Daniel Veillarda880b122003-04-21 21:36:41 +0000371#define SHRINK if ((ctxt->progressive == 0) && \
372 (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK))\
Daniel Veillard46de64e2002-05-29 08:21:33 +0000373 xmlSHRINK (ctxt);
374
375static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
376 xmlParserInputShrink(ctxt->input);
377 if ((*ctxt->input->cur == 0) &&
378 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
379 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +0000380 }
Owen Taylor3473f882001-02-23 17:55:21 +0000381
Daniel Veillarda880b122003-04-21 21:36:41 +0000382#define GROW if ((ctxt->progressive == 0) && \
383 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +0000384 xmlGROW (ctxt);
385
386static void xmlGROW (xmlParserCtxtPtr ctxt) {
387 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
388 if ((*ctxt->input->cur == 0) &&
389 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
390 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +0000391}
Owen Taylor3473f882001-02-23 17:55:21 +0000392
393#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
394
395#define NEXT xmlNextChar(ctxt)
396
Daniel Veillard21a0f912001-02-25 19:54:14 +0000397#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +0000398 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000399 ctxt->input->cur++; \
400 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000401 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000402 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
403 }
404
Owen Taylor3473f882001-02-23 17:55:21 +0000405#define NEXTL(l) do { \
406 if (*(ctxt->input->cur) == '\n') { \
407 ctxt->input->line++; ctxt->input->col = 1; \
408 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +0000409 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +0000410 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000411 } while (0)
412
413#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
414#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
415
416#define COPY_BUF(l,b,i,v) \
417 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000418 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000419
420/**
421 * xmlSkipBlankChars:
422 * @ctxt: the XML parser context
423 *
424 * skip all blanks character found at that point in the input streams.
425 * It pops up finished entities in the process if allowable at that point.
426 *
427 * Returns the number of space chars skipped
428 */
429
430int
431xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000432 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000433
434 /*
435 * It's Okay to use CUR/NEXT here since all the blanks are on
436 * the ASCII range.
437 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000438 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
439 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000440 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000441 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000442 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000443 cur = ctxt->input->cur;
444 while (IS_BLANK(*cur)) {
445 if (*cur == '\n') {
446 ctxt->input->line++; ctxt->input->col = 1;
447 }
448 cur++;
449 res++;
450 if (*cur == 0) {
451 ctxt->input->cur = cur;
452 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
453 cur = ctxt->input->cur;
454 }
455 }
456 ctxt->input->cur = cur;
457 } else {
458 int cur;
459 do {
460 cur = CUR;
461 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
462 NEXT;
463 cur = CUR;
464 res++;
465 }
466 while ((cur == 0) && (ctxt->inputNr > 1) &&
467 (ctxt->instate != XML_PARSER_COMMENT)) {
468 xmlPopInput(ctxt);
469 cur = CUR;
470 }
471 /*
472 * Need to handle support of entities branching here
473 */
474 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
475 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
476 }
Owen Taylor3473f882001-02-23 17:55:21 +0000477 return(res);
478}
479
480/************************************************************************
481 * *
482 * Commodity functions to handle entities *
483 * *
484 ************************************************************************/
485
486/**
487 * xmlPopInput:
488 * @ctxt: an XML parser context
489 *
490 * xmlPopInput: the current input pointed by ctxt->input came to an end
491 * pop it and return the next char.
492 *
493 * Returns the current xmlChar in the parser context
494 */
495xmlChar
496xmlPopInput(xmlParserCtxtPtr ctxt) {
497 if (ctxt->inputNr == 1) return(0); /* End of main Input */
498 if (xmlParserDebugEntities)
499 xmlGenericError(xmlGenericErrorContext,
500 "Popping input %d\n", ctxt->inputNr);
501 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000502 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000503 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
504 return(xmlPopInput(ctxt));
505 return(CUR);
506}
507
508/**
509 * xmlPushInput:
510 * @ctxt: an XML parser context
511 * @input: an XML parser input fragment (entity, XML fragment ...).
512 *
513 * xmlPushInput: switch to a new input stream which is stacked on top
514 * of the previous one(s).
515 */
516void
517xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
518 if (input == NULL) return;
519
520 if (xmlParserDebugEntities) {
521 if ((ctxt->input != NULL) && (ctxt->input->filename))
522 xmlGenericError(xmlGenericErrorContext,
523 "%s(%d): ", ctxt->input->filename,
524 ctxt->input->line);
525 xmlGenericError(xmlGenericErrorContext,
526 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
527 }
528 inputPush(ctxt, input);
529 GROW;
530}
531
532/**
533 * xmlParseCharRef:
534 * @ctxt: an XML parser context
535 *
536 * parse Reference declarations
537 *
538 * [66] CharRef ::= '&#' [0-9]+ ';' |
539 * '&#x' [0-9a-fA-F]+ ';'
540 *
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 *
545 * Returns the value parsed (as an int), 0 in case of error
546 */
547int
548xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000549 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000550 int count = 0;
551
Owen Taylor3473f882001-02-23 17:55:21 +0000552 /*
553 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
554 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000555 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000556 (NXT(2) == 'x')) {
557 SKIP(3);
558 GROW;
559 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000560 if (count++ > 20) {
561 count = 0;
562 GROW;
563 }
564 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000565 val = val * 16 + (CUR - '0');
566 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
567 val = val * 16 + (CUR - 'a') + 10;
568 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
569 val = val * 16 + (CUR - 'A') + 10;
570 else {
571 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
573 ctxt->sax->error(ctxt->userData,
574 "xmlParseCharRef: invalid hexadecimal value\n");
575 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000576 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000577 val = 0;
578 break;
579 }
580 NEXT;
581 count++;
582 }
583 if (RAW == ';') {
584 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000585 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000586 ctxt->nbChars ++;
587 ctxt->input->cur++;
588 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000589 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000590 SKIP(2);
591 GROW;
592 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000593 if (count++ > 20) {
594 count = 0;
595 GROW;
596 }
597 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000598 val = val * 10 + (CUR - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000606 val = 0;
607 break;
608 }
609 NEXT;
610 count++;
611 }
612 if (RAW == ';') {
613 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +0000614 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +0000615 ctxt->nbChars ++;
616 ctxt->input->cur++;
617 }
618 } else {
619 ctxt->errNo = XML_ERR_INVALID_CHARREF;
620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
621 ctxt->sax->error(ctxt->userData,
622 "xmlParseCharRef: invalid value\n");
623 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000624 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000625 }
626
627 /*
628 * [ WFC: Legal Character ]
629 * Characters referred to using character references must match the
630 * production for Char.
631 */
632 if (IS_CHAR(val)) {
633 return(val);
634 } else {
635 ctxt->errNo = XML_ERR_INVALID_CHAR;
636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000637 ctxt->sax->error(ctxt->userData,
638 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000639 val);
640 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000641 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000642 }
643 return(0);
644}
645
646/**
647 * xmlParseStringCharRef:
648 * @ctxt: an XML parser context
649 * @str: a pointer to an index in the string
650 *
651 * parse Reference declarations, variant parsing from a string rather
652 * than an an input flow.
653 *
654 * [66] CharRef ::= '&#' [0-9]+ ';' |
655 * '&#x' [0-9a-fA-F]+ ';'
656 *
657 * [ WFC: Legal Character ]
658 * Characters referred to using character references must match the
659 * production for Char.
660 *
661 * Returns the value parsed (as an int), 0 in case of error, str will be
662 * updated to the current value of the index
663 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000664static int
Owen Taylor3473f882001-02-23 17:55:21 +0000665xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
666 const xmlChar *ptr;
667 xmlChar cur;
668 int val = 0;
669
670 if ((str == NULL) || (*str == NULL)) return(0);
671 ptr = *str;
672 cur = *ptr;
673 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
674 ptr += 3;
675 cur = *ptr;
676 while (cur != ';') { /* Non input consuming loop */
677 if ((cur >= '0') && (cur <= '9'))
678 val = val * 16 + (cur - '0');
679 else if ((cur >= 'a') && (cur <= 'f'))
680 val = val * 16 + (cur - 'a') + 10;
681 else if ((cur >= 'A') && (cur <= 'F'))
682 val = val * 16 + (cur - 'A') + 10;
683 else {
684 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
686 ctxt->sax->error(ctxt->userData,
687 "xmlParseStringCharRef: invalid hexadecimal value\n");
688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000690 val = 0;
691 break;
692 }
693 ptr++;
694 cur = *ptr;
695 }
696 if (cur == ';')
697 ptr++;
698 } else if ((cur == '&') && (ptr[1] == '#')){
699 ptr += 2;
700 cur = *ptr;
701 while (cur != ';') { /* Non input consuming loops */
702 if ((cur >= '0') && (cur <= '9'))
703 val = val * 10 + (cur - '0');
704 else {
705 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
707 ctxt->sax->error(ctxt->userData,
708 "xmlParseStringCharRef: invalid decimal value\n");
709 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000710 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000711 val = 0;
712 break;
713 }
714 ptr++;
715 cur = *ptr;
716 }
717 if (cur == ';')
718 ptr++;
719 } else {
720 ctxt->errNo = XML_ERR_INVALID_CHARREF;
721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
722 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000723 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000724 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000725 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000726 return(0);
727 }
728 *str = ptr;
729
730 /*
731 * [ WFC: Legal Character ]
732 * Characters referred to using character references must match the
733 * production for Char.
734 */
735 if (IS_CHAR(val)) {
736 return(val);
737 } else {
738 ctxt->errNo = XML_ERR_INVALID_CHAR;
739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
740 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000741 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000744 }
745 return(0);
746}
747
748/**
Daniel Veillardf5582f12002-06-11 10:08:16 +0000749 * xmlNewBlanksWrapperInputStream:
750 * @ctxt: an XML parser context
751 * @entity: an Entity pointer
752 *
753 * Create a new input stream for wrapping
754 * blanks around a PEReference
755 *
756 * Returns the new input stream or NULL
757 */
758
759static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
760
Daniel Veillardf4862f02002-09-10 11:13:43 +0000761static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +0000762xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
763 xmlParserInputPtr input;
764 xmlChar *buffer;
765 size_t length;
766 if (entity == NULL) {
767 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
769 ctxt->sax->error(ctxt->userData,
770 "internal: xmlNewBlanksWrapperInputStream entity = NULL\n");
771 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
772 return(NULL);
773 }
774 if (xmlParserDebugEntities)
775 xmlGenericError(xmlGenericErrorContext,
776 "new blanks wrapper for entity: %s\n", entity->name);
777 input = xmlNewInputStream(ctxt);
778 if (input == NULL) {
779 return(NULL);
780 }
781 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +0000782 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +0000783 if (buffer == NULL) {
784 return(NULL);
785 }
786 buffer [0] = ' ';
787 buffer [1] = '%';
788 buffer [length-3] = ';';
789 buffer [length-2] = ' ';
790 buffer [length-1] = 0;
791 memcpy(buffer + 2, entity->name, length - 5);
792 input->free = deallocblankswrapper;
793 input->base = buffer;
794 input->cur = buffer;
795 input->length = length;
796 input->end = &buffer[length];
797 return(input);
798}
799
800/**
Owen Taylor3473f882001-02-23 17:55:21 +0000801 * xmlParserHandlePEReference:
802 * @ctxt: the parser context
803 *
804 * [69] PEReference ::= '%' Name ';'
805 *
806 * [ WFC: No Recursion ]
807 * A parsed entity must not contain a recursive
808 * reference to itself, either directly or indirectly.
809 *
810 * [ WFC: Entity Declared ]
811 * In a document without any DTD, a document with only an internal DTD
812 * subset which contains no parameter entity references, or a document
813 * with "standalone='yes'", ... ... The declaration of a parameter
814 * entity must precede any reference to it...
815 *
816 * [ VC: Entity Declared ]
817 * In a document with an external subset or external parameter entities
818 * with "standalone='no'", ... ... The declaration of a parameter entity
819 * must precede any reference to it...
820 *
821 * [ WFC: In DTD ]
822 * Parameter-entity references may only appear in the DTD.
823 * NOTE: misleading but this is handled.
824 *
825 * A PEReference may have been detected in the current input stream
826 * the handling is done accordingly to
827 * http://www.w3.org/TR/REC-xml#entproc
828 * i.e.
829 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000830 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000831 */
832void
833xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
834 xmlChar *name;
835 xmlEntityPtr entity = NULL;
836 xmlParserInputPtr input;
837
Owen Taylor3473f882001-02-23 17:55:21 +0000838 if (RAW != '%') return;
839 switch(ctxt->instate) {
840 case XML_PARSER_CDATA_SECTION:
841 return;
842 case XML_PARSER_COMMENT:
843 return;
844 case XML_PARSER_START_TAG:
845 return;
846 case XML_PARSER_END_TAG:
847 return;
848 case XML_PARSER_EOF:
849 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
852 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000853 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000854 return;
855 case XML_PARSER_PROLOG:
856 case XML_PARSER_START:
857 case XML_PARSER_MISC:
858 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
860 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
861 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000862 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000863 return;
864 case XML_PARSER_ENTITY_DECL:
865 case XML_PARSER_CONTENT:
866 case XML_PARSER_ATTRIBUTE_VALUE:
867 case XML_PARSER_PI:
868 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000869 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000870 /* we just ignore it there */
871 return;
872 case XML_PARSER_EPILOG:
873 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
875 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
876 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000877 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000878 return;
879 case XML_PARSER_ENTITY_VALUE:
880 /*
881 * NOTE: in the case of entity values, we don't do the
882 * substitution here since we need the literal
883 * entity value to be able to save the internal
884 * subset of the document.
885 * This will be handled by xmlStringDecodeEntities
886 */
887 return;
888 case XML_PARSER_DTD:
889 /*
890 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
891 * In the internal DTD subset, parameter-entity references
892 * can occur only where markup declarations can occur, not
893 * within markup declarations.
894 * In that case this is handled in xmlParseMarkupDecl
895 */
896 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
897 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +0000898 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
899 return;
Owen Taylor3473f882001-02-23 17:55:21 +0000900 break;
901 case XML_PARSER_IGNORE:
902 return;
903 }
904
905 NEXT;
906 name = xmlParseName(ctxt);
907 if (xmlParserDebugEntities)
908 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000909 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000910 if (name == NULL) {
911 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000913 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000914 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000915 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000916 } else {
917 if (RAW == ';') {
918 NEXT;
919 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
920 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
921 if (entity == NULL) {
922
923 /*
924 * [ WFC: Entity Declared ]
925 * In a document without any DTD, a document with only an
926 * internal DTD subset which contains no parameter entity
927 * references, or a document with "standalone='yes'", ...
928 * ... The declaration of a parameter entity must precede
929 * any reference to it...
930 */
931 if ((ctxt->standalone == 1) ||
932 ((ctxt->hasExternalSubset == 0) &&
933 (ctxt->hasPErefs == 0))) {
934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
935 ctxt->sax->error(ctxt->userData,
936 "PEReference: %%%s; not found\n", name);
937 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +0000938 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +0000939 } else {
940 /*
941 * [ VC: Entity Declared ]
942 * In a document with an external subset or external
943 * parameter entities with "standalone='no'", ...
944 * ... The declaration of a parameter entity must precede
945 * any reference to it...
946 */
947 if ((!ctxt->disableSAX) &&
948 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
949 ctxt->vctxt.error(ctxt->vctxt.userData,
950 "PEReference: %%%s; not found\n", name);
951 } else if ((!ctxt->disableSAX) &&
952 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
953 ctxt->sax->warning(ctxt->userData,
954 "PEReference: %%%s; not found\n", name);
955 ctxt->valid = 0;
956 }
Daniel Veillardf5582f12002-06-11 10:08:16 +0000957 } else if (ctxt->input->free != deallocblankswrapper) {
958 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
959 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +0000960 } else {
961 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
962 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000963 xmlChar start[4];
964 xmlCharEncoding enc;
965
Owen Taylor3473f882001-02-23 17:55:21 +0000966 /*
967 * handle the extra spaces added before and after
968 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000969 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000970 */
971 input = xmlNewEntityInputStream(ctxt, entity);
972 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000973
974 /*
975 * Get the 4 first bytes and decode the charset
976 * if enc != XML_CHAR_ENCODING_NONE
977 * plug some encoding conversion routines.
978 */
979 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +0000980 if (entity->length >= 4) {
981 start[0] = RAW;
982 start[1] = NXT(1);
983 start[2] = NXT(2);
984 start[3] = NXT(3);
985 enc = xmlDetectCharEncoding(start, 4);
986 if (enc != XML_CHAR_ENCODING_NONE) {
987 xmlSwitchEncoding(ctxt, enc);
988 }
Daniel Veillard87a764e2001-06-20 17:41:10 +0000989 }
990
Owen Taylor3473f882001-02-23 17:55:21 +0000991 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
992 (RAW == '<') && (NXT(1) == '?') &&
993 (NXT(2) == 'x') && (NXT(3) == 'm') &&
994 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
995 xmlParseTextDecl(ctxt);
996 }
Owen Taylor3473f882001-02-23 17:55:21 +0000997 } else {
998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
999 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001000 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +00001001 name);
1002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001004 }
1005 }
1006 } else {
1007 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1009 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001010 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001011 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001013 }
1014 xmlFree(name);
1015 }
1016}
1017
1018/*
1019 * Macro used to grow the current buffer.
1020 */
1021#define growBuffer(buffer) { \
1022 buffer##_size *= 2; \
1023 buffer = (xmlChar *) \
1024 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1025 if (buffer == NULL) { \
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001026 xmlGenericError(xmlGenericErrorContext, "realloc failed"); \
Owen Taylor3473f882001-02-23 17:55:21 +00001027 return(NULL); \
1028 } \
1029}
1030
1031/**
1032 * xmlStringDecodeEntities:
1033 * @ctxt: the parser context
1034 * @str: the input string
1035 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1036 * @end: an end marker xmlChar, 0 if none
1037 * @end2: an end marker xmlChar, 0 if none
1038 * @end3: an end marker xmlChar, 0 if none
1039 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001040 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001041 *
1042 * [67] Reference ::= EntityRef | CharRef
1043 *
1044 * [69] PEReference ::= '%' Name ';'
1045 *
1046 * Returns A newly allocated string with the substitution done. The caller
1047 * must deallocate it !
1048 */
1049xmlChar *
1050xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1051 xmlChar end, xmlChar end2, xmlChar end3) {
1052 xmlChar *buffer = NULL;
1053 int buffer_size = 0;
1054
1055 xmlChar *current = NULL;
1056 xmlEntityPtr ent;
1057 int c,l;
1058 int nbchars = 0;
1059
1060 if (str == NULL)
1061 return(NULL);
1062
1063 if (ctxt->depth > 40) {
1064 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1066 ctxt->sax->error(ctxt->userData,
1067 "Detected entity reference loop\n");
1068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00001069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00001070 return(NULL);
1071 }
1072
1073 /*
1074 * allocate a translation buffer.
1075 */
1076 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001077 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001078 if (buffer == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00001079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00001081 return(NULL);
1082 }
1083
1084 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001085 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001086 * we are operating on already parsed values.
1087 */
1088 c = CUR_SCHAR(str, l);
1089 while ((c != 0) && (c != end) && /* non input consuming loop */
1090 (c != end2) && (c != end3)) {
1091
1092 if (c == 0) break;
1093 if ((c == '&') && (str[1] == '#')) {
1094 int val = xmlParseStringCharRef(ctxt, &str);
1095 if (val != 0) {
1096 COPY_BUF(0,buffer,nbchars,val);
1097 }
1098 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1099 if (xmlParserDebugEntities)
1100 xmlGenericError(xmlGenericErrorContext,
1101 "String decoding Entity Reference: %.30s\n",
1102 str);
1103 ent = xmlParseStringEntityRef(ctxt, &str);
1104 if ((ent != NULL) &&
1105 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1106 if (ent->content != NULL) {
1107 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1108 } else {
1109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1110 ctxt->sax->error(ctxt->userData,
1111 "internal error entity has no content\n");
1112 }
1113 } else if ((ent != NULL) && (ent->content != NULL)) {
1114 xmlChar *rep;
1115
1116 ctxt->depth++;
1117 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1118 0, 0, 0);
1119 ctxt->depth--;
1120 if (rep != NULL) {
1121 current = rep;
1122 while (*current != 0) { /* non input consuming loop */
1123 buffer[nbchars++] = *current++;
1124 if (nbchars >
1125 buffer_size - XML_PARSER_BUFFER_SIZE) {
1126 growBuffer(buffer);
1127 }
1128 }
1129 xmlFree(rep);
1130 }
1131 } else if (ent != NULL) {
1132 int i = xmlStrlen(ent->name);
1133 const xmlChar *cur = ent->name;
1134
1135 buffer[nbchars++] = '&';
1136 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1137 growBuffer(buffer);
1138 }
1139 for (;i > 0;i--)
1140 buffer[nbchars++] = *cur++;
1141 buffer[nbchars++] = ';';
1142 }
1143 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1144 if (xmlParserDebugEntities)
1145 xmlGenericError(xmlGenericErrorContext,
1146 "String decoding PE Reference: %.30s\n", str);
1147 ent = xmlParseStringPEReference(ctxt, &str);
1148 if (ent != NULL) {
1149 xmlChar *rep;
1150
1151 ctxt->depth++;
1152 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1153 0, 0, 0);
1154 ctxt->depth--;
1155 if (rep != NULL) {
1156 current = rep;
1157 while (*current != 0) { /* non input consuming loop */
1158 buffer[nbchars++] = *current++;
1159 if (nbchars >
1160 buffer_size - XML_PARSER_BUFFER_SIZE) {
1161 growBuffer(buffer);
1162 }
1163 }
1164 xmlFree(rep);
1165 }
1166 }
1167 } else {
1168 COPY_BUF(l,buffer,nbchars,c);
1169 str += l;
1170 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1171 growBuffer(buffer);
1172 }
1173 }
1174 c = CUR_SCHAR(str, l);
1175 }
1176 buffer[nbchars++] = 0;
1177 return(buffer);
1178}
1179
1180
1181/************************************************************************
1182 * *
1183 * Commodity functions to handle xmlChars *
1184 * *
1185 ************************************************************************/
1186
1187/**
1188 * xmlStrndup:
1189 * @cur: the input xmlChar *
1190 * @len: the len of @cur
1191 *
1192 * a strndup for array of xmlChar's
1193 *
1194 * Returns a new xmlChar * or NULL
1195 */
1196xmlChar *
1197xmlStrndup(const xmlChar *cur, int len) {
1198 xmlChar *ret;
1199
1200 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001201 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001202 if (ret == NULL) {
1203 xmlGenericError(xmlGenericErrorContext,
1204 "malloc of %ld byte failed\n",
1205 (len + 1) * (long)sizeof(xmlChar));
1206 return(NULL);
1207 }
1208 memcpy(ret, cur, len * sizeof(xmlChar));
1209 ret[len] = 0;
1210 return(ret);
1211}
1212
1213/**
1214 * xmlStrdup:
1215 * @cur: the input xmlChar *
1216 *
1217 * a strdup for array of xmlChar's. Since they are supposed to be
1218 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1219 * a termination mark of '0'.
1220 *
1221 * Returns a new xmlChar * or NULL
1222 */
1223xmlChar *
1224xmlStrdup(const xmlChar *cur) {
1225 const xmlChar *p = cur;
1226
1227 if (cur == NULL) return(NULL);
1228 while (*p != 0) p++; /* non input consuming */
1229 return(xmlStrndup(cur, p - cur));
1230}
1231
1232/**
1233 * xmlCharStrndup:
1234 * @cur: the input char *
1235 * @len: the len of @cur
1236 *
1237 * a strndup for char's to xmlChar's
1238 *
1239 * Returns a new xmlChar * or NULL
1240 */
1241
1242xmlChar *
1243xmlCharStrndup(const char *cur, int len) {
1244 int i;
1245 xmlChar *ret;
1246
1247 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001248 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001249 if (ret == NULL) {
1250 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1251 (len + 1) * (long)sizeof(xmlChar));
1252 return(NULL);
1253 }
1254 for (i = 0;i < len;i++)
1255 ret[i] = (xmlChar) cur[i];
1256 ret[len] = 0;
1257 return(ret);
1258}
1259
1260/**
1261 * xmlCharStrdup:
1262 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001263 *
1264 * a strdup for char's to xmlChar's
1265 *
1266 * Returns a new xmlChar * or NULL
1267 */
1268
1269xmlChar *
1270xmlCharStrdup(const char *cur) {
1271 const char *p = cur;
1272
1273 if (cur == NULL) return(NULL);
1274 while (*p != '\0') p++; /* non input consuming */
1275 return(xmlCharStrndup(cur, p - cur));
1276}
1277
1278/**
1279 * xmlStrcmp:
1280 * @str1: the first xmlChar *
1281 * @str2: the second xmlChar *
1282 *
1283 * a strcmp for xmlChar's
1284 *
1285 * Returns the integer result of the comparison
1286 */
1287
1288int
1289xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1290 register int tmp;
1291
1292 if (str1 == str2) return(0);
1293 if (str1 == NULL) return(-1);
1294 if (str2 == NULL) return(1);
1295 do {
1296 tmp = *str1++ - *str2;
1297 if (tmp != 0) return(tmp);
1298 } while (*str2++ != 0);
1299 return 0;
1300}
1301
1302/**
1303 * xmlStrEqual:
1304 * @str1: the first xmlChar *
1305 * @str2: the second xmlChar *
1306 *
1307 * Check if both string are equal of have same content
1308 * Should be a bit more readable and faster than xmlStrEqual()
1309 *
1310 * Returns 1 if they are equal, 0 if they are different
1311 */
1312
1313int
1314xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1315 if (str1 == str2) return(1);
1316 if (str1 == NULL) return(0);
1317 if (str2 == NULL) return(0);
1318 do {
1319 if (*str1++ != *str2) return(0);
1320 } while (*str2++);
1321 return(1);
1322}
1323
1324/**
1325 * xmlStrncmp:
1326 * @str1: the first xmlChar *
1327 * @str2: the second xmlChar *
1328 * @len: the max comparison length
1329 *
1330 * a strncmp for xmlChar's
1331 *
1332 * Returns the integer result of the comparison
1333 */
1334
1335int
1336xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1337 register int tmp;
1338
1339 if (len <= 0) return(0);
1340 if (str1 == str2) return(0);
1341 if (str1 == NULL) return(-1);
1342 if (str2 == NULL) return(1);
1343 do {
1344 tmp = *str1++ - *str2;
1345 if (tmp != 0 || --len == 0) return(tmp);
1346 } while (*str2++ != 0);
1347 return 0;
1348}
1349
Daniel Veillardb44025c2001-10-11 22:55:55 +00001350static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001351 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1352 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1353 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1354 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1355 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1356 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1357 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1358 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1359 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1360 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1361 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1362 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1363 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1364 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1365 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1366 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1367 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1368 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1369 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1370 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1371 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1372 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1373 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1374 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1375 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1376 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1377 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1378 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1379 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1380 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1381 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1382 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1383};
1384
1385/**
1386 * xmlStrcasecmp:
1387 * @str1: the first xmlChar *
1388 * @str2: the second xmlChar *
1389 *
1390 * a strcasecmp for xmlChar's
1391 *
1392 * Returns the integer result of the comparison
1393 */
1394
1395int
1396xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1397 register int tmp;
1398
1399 if (str1 == str2) return(0);
1400 if (str1 == NULL) return(-1);
1401 if (str2 == NULL) return(1);
1402 do {
1403 tmp = casemap[*str1++] - casemap[*str2];
1404 if (tmp != 0) return(tmp);
1405 } while (*str2++ != 0);
1406 return 0;
1407}
1408
1409/**
1410 * xmlStrncasecmp:
1411 * @str1: the first xmlChar *
1412 * @str2: the second xmlChar *
1413 * @len: the max comparison length
1414 *
1415 * a strncasecmp for xmlChar's
1416 *
1417 * Returns the integer result of the comparison
1418 */
1419
1420int
1421xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1422 register int tmp;
1423
1424 if (len <= 0) return(0);
1425 if (str1 == str2) return(0);
1426 if (str1 == NULL) return(-1);
1427 if (str2 == NULL) return(1);
1428 do {
1429 tmp = casemap[*str1++] - casemap[*str2];
1430 if (tmp != 0 || --len == 0) return(tmp);
1431 } while (*str2++ != 0);
1432 return 0;
1433}
1434
1435/**
1436 * xmlStrchr:
1437 * @str: the xmlChar * array
1438 * @val: the xmlChar to search
1439 *
1440 * a strchr for xmlChar's
1441 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001442 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001443 */
1444
1445const xmlChar *
1446xmlStrchr(const xmlChar *str, xmlChar val) {
1447 if (str == NULL) return(NULL);
1448 while (*str != 0) { /* non input consuming */
1449 if (*str == val) return((xmlChar *) str);
1450 str++;
1451 }
1452 return(NULL);
1453}
1454
1455/**
1456 * xmlStrstr:
1457 * @str: the xmlChar * array (haystack)
1458 * @val: the xmlChar to search (needle)
1459 *
1460 * a strstr for xmlChar's
1461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001462 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001463 */
1464
1465const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001466xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001467 int n;
1468
1469 if (str == NULL) return(NULL);
1470 if (val == NULL) return(NULL);
1471 n = xmlStrlen(val);
1472
1473 if (n == 0) return(str);
1474 while (*str != 0) { /* non input consuming */
1475 if (*str == *val) {
1476 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1477 }
1478 str++;
1479 }
1480 return(NULL);
1481}
1482
1483/**
1484 * xmlStrcasestr:
1485 * @str: the xmlChar * array (haystack)
1486 * @val: the xmlChar to search (needle)
1487 *
1488 * a case-ignoring strstr for xmlChar's
1489 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001490 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001491 */
1492
1493const xmlChar *
1494xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1495 int n;
1496
1497 if (str == NULL) return(NULL);
1498 if (val == NULL) return(NULL);
1499 n = xmlStrlen(val);
1500
1501 if (n == 0) return(str);
1502 while (*str != 0) { /* non input consuming */
1503 if (casemap[*str] == casemap[*val])
1504 if (!xmlStrncasecmp(str, val, n)) return(str);
1505 str++;
1506 }
1507 return(NULL);
1508}
1509
1510/**
1511 * xmlStrsub:
1512 * @str: the xmlChar * array (haystack)
1513 * @start: the index of the first char (zero based)
1514 * @len: the length of the substring
1515 *
1516 * Extract a substring of a given string
1517 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001518 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001519 */
1520
1521xmlChar *
1522xmlStrsub(const xmlChar *str, int start, int len) {
1523 int i;
1524
1525 if (str == NULL) return(NULL);
1526 if (start < 0) return(NULL);
1527 if (len < 0) return(NULL);
1528
1529 for (i = 0;i < start;i++) {
1530 if (*str == 0) return(NULL);
1531 str++;
1532 }
1533 if (*str == 0) return(NULL);
1534 return(xmlStrndup(str, len));
1535}
1536
1537/**
1538 * xmlStrlen:
1539 * @str: the xmlChar * array
1540 *
1541 * length of a xmlChar's string
1542 *
1543 * Returns the number of xmlChar contained in the ARRAY.
1544 */
1545
1546int
1547xmlStrlen(const xmlChar *str) {
1548 int len = 0;
1549
1550 if (str == NULL) return(0);
1551 while (*str != 0) { /* non input consuming */
1552 str++;
1553 len++;
1554 }
1555 return(len);
1556}
1557
1558/**
1559 * xmlStrncat:
1560 * @cur: the original xmlChar * array
1561 * @add: the xmlChar * array added
1562 * @len: the length of @add
1563 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001564 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001565 * first bytes of @add.
1566 *
1567 * Returns a new xmlChar *, the original @cur is reallocated if needed
1568 * and should not be freed
1569 */
1570
1571xmlChar *
1572xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1573 int size;
1574 xmlChar *ret;
1575
1576 if ((add == NULL) || (len == 0))
1577 return(cur);
1578 if (cur == NULL)
1579 return(xmlStrndup(add, len));
1580
1581 size = xmlStrlen(cur);
1582 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1583 if (ret == NULL) {
1584 xmlGenericError(xmlGenericErrorContext,
1585 "xmlStrncat: realloc of %ld byte failed\n",
1586 (size + len + 1) * (long)sizeof(xmlChar));
1587 return(cur);
1588 }
1589 memcpy(&ret[size], add, len * sizeof(xmlChar));
1590 ret[size + len] = 0;
1591 return(ret);
1592}
1593
1594/**
1595 * xmlStrcat:
1596 * @cur: the original xmlChar * array
1597 * @add: the xmlChar * array added
1598 *
1599 * a strcat for array of xmlChar's. Since they are supposed to be
1600 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1601 * a termination mark of '0'.
1602 *
1603 * Returns a new xmlChar * containing the concatenated string.
1604 */
1605xmlChar *
1606xmlStrcat(xmlChar *cur, const xmlChar *add) {
1607 const xmlChar *p = add;
1608
1609 if (add == NULL) return(cur);
1610 if (cur == NULL)
1611 return(xmlStrdup(add));
1612
1613 while (*p != 0) p++; /* non input consuming */
1614 return(xmlStrncat(cur, add, p - add));
1615}
1616
1617/************************************************************************
1618 * *
1619 * Commodity functions, cleanup needed ? *
1620 * *
1621 ************************************************************************/
1622
1623/**
1624 * areBlanks:
1625 * @ctxt: an XML parser context
1626 * @str: a xmlChar *
1627 * @len: the size of @str
1628 *
1629 * Is this a sequence of blank chars that one can ignore ?
1630 *
1631 * Returns 1 if ignorable 0 otherwise.
1632 */
1633
1634static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1635 int i, ret;
1636 xmlNodePtr lastChild;
1637
Daniel Veillard05c13a22001-09-09 08:38:09 +00001638 /*
1639 * Don't spend time trying to differentiate them, the same callback is
1640 * used !
1641 */
1642 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001643 return(0);
1644
Owen Taylor3473f882001-02-23 17:55:21 +00001645 /*
1646 * Check for xml:space value.
1647 */
1648 if (*(ctxt->space) == 1)
1649 return(0);
1650
1651 /*
1652 * Check that the string is made of blanks
1653 */
1654 for (i = 0;i < len;i++)
1655 if (!(IS_BLANK(str[i]))) return(0);
1656
1657 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001658 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001659 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001660 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (ctxt->myDoc != NULL) {
1662 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1663 if (ret == 0) return(1);
1664 if (ret == 1) return(0);
1665 }
1666
1667 /*
1668 * Otherwise, heuristic :-\
1669 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001671 if ((ctxt->node->children == NULL) &&
1672 (RAW == '<') && (NXT(1) == '/')) return(0);
1673
1674 lastChild = xmlGetLastChild(ctxt->node);
1675 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001676 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1677 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 } else if (xmlNodeIsText(lastChild))
1679 return(0);
1680 else if ((ctxt->node->children != NULL) &&
1681 (xmlNodeIsText(ctxt->node->children)))
1682 return(0);
1683 return(1);
1684}
1685
Owen Taylor3473f882001-02-23 17:55:21 +00001686/************************************************************************
1687 * *
1688 * Extra stuff for namespace support *
1689 * Relates to http://www.w3.org/TR/WD-xml-names *
1690 * *
1691 ************************************************************************/
1692
1693/**
1694 * xmlSplitQName:
1695 * @ctxt: an XML parser context
1696 * @name: an XML parser context
1697 * @prefix: a xmlChar **
1698 *
1699 * parse an UTF8 encoded XML qualified name string
1700 *
1701 * [NS 5] QName ::= (Prefix ':')? LocalPart
1702 *
1703 * [NS 6] Prefix ::= NCName
1704 *
1705 * [NS 7] LocalPart ::= NCName
1706 *
1707 * Returns the local part, and prefix is updated
1708 * to get the Prefix if any.
1709 */
1710
1711xmlChar *
1712xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1713 xmlChar buf[XML_MAX_NAMELEN + 5];
1714 xmlChar *buffer = NULL;
1715 int len = 0;
1716 int max = XML_MAX_NAMELEN;
1717 xmlChar *ret = NULL;
1718 const xmlChar *cur = name;
1719 int c;
1720
1721 *prefix = NULL;
1722
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001723 if (cur == NULL) return(NULL);
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725#ifndef XML_XML_NAMESPACE
1726 /* xml: prefix is not really a namespace */
1727 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1728 (cur[2] == 'l') && (cur[3] == ':'))
1729 return(xmlStrdup(name));
1730#endif
1731
1732 /* nasty but valid */
1733 if (cur[0] == ':')
1734 return(xmlStrdup(name));
1735
1736 c = *cur++;
1737 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1738 buf[len++] = c;
1739 c = *cur++;
1740 }
1741 if (len >= max) {
1742 /*
1743 * Okay someone managed to make a huge name, so he's ready to pay
1744 * for the processing speed.
1745 */
1746 max = len * 2;
1747
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001748 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001749 if (buffer == NULL) {
1750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt->userData,
1752 "xmlSplitQName: out of memory\n");
1753 return(NULL);
1754 }
1755 memcpy(buffer, buf, len);
1756 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1757 if (len + 10 > max) {
1758 max *= 2;
1759 buffer = (xmlChar *) xmlRealloc(buffer,
1760 max * sizeof(xmlChar));
1761 if (buffer == NULL) {
1762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1763 ctxt->sax->error(ctxt->userData,
1764 "xmlSplitQName: out of memory\n");
1765 return(NULL);
1766 }
1767 }
1768 buffer[len++] = c;
1769 c = *cur++;
1770 }
1771 buffer[len] = 0;
1772 }
1773
1774 if (buffer == NULL)
1775 ret = xmlStrndup(buf, len);
1776 else {
1777 ret = buffer;
1778 buffer = NULL;
1779 max = XML_MAX_NAMELEN;
1780 }
1781
1782
1783 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00001784 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001785 if (c == 0) return(ret);
1786 *prefix = ret;
1787 len = 0;
1788
Daniel Veillardbb284f42002-10-16 18:02:47 +00001789 /*
1790 * Check that the first character is proper to start
1791 * a new name
1792 */
1793 if (!(((c >= 0x61) && (c <= 0x7A)) ||
1794 ((c >= 0x41) && (c <= 0x5A)) ||
1795 (c == '_') || (c == ':'))) {
1796 int l;
1797 int first = CUR_SCHAR(cur, l);
1798
1799 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001800 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1801 (ctxt->sax->error != NULL))
Daniel Veillardbb284f42002-10-16 18:02:47 +00001802 ctxt->sax->error(ctxt->userData,
1803 "Name %s is not XML Namespace compliant\n",
1804 name);
1805 }
1806 }
1807 cur++;
1808
Owen Taylor3473f882001-02-23 17:55:21 +00001809 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1810 buf[len++] = c;
1811 c = *cur++;
1812 }
1813 if (len >= max) {
1814 /*
1815 * Okay someone managed to make a huge name, so he's ready to pay
1816 * for the processing speed.
1817 */
1818 max = len * 2;
1819
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001820 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001821 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001822 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1823 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001824 ctxt->sax->error(ctxt->userData,
1825 "xmlSplitQName: out of memory\n");
1826 return(NULL);
1827 }
1828 memcpy(buffer, buf, len);
1829 while (c != 0) { /* tested bigname2.xml */
1830 if (len + 10 > max) {
1831 max *= 2;
1832 buffer = (xmlChar *) xmlRealloc(buffer,
1833 max * sizeof(xmlChar));
1834 if (buffer == NULL) {
Daniel Veillard0eb38c72002-12-14 23:00:35 +00001835 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
1836 (ctxt->sax->error != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00001837 ctxt->sax->error(ctxt->userData,
1838 "xmlSplitQName: out of memory\n");
1839 return(NULL);
1840 }
1841 }
1842 buffer[len++] = c;
1843 c = *cur++;
1844 }
1845 buffer[len] = 0;
1846 }
1847
1848 if (buffer == NULL)
1849 ret = xmlStrndup(buf, len);
1850 else {
1851 ret = buffer;
1852 }
1853 }
1854
1855 return(ret);
1856}
1857
1858/************************************************************************
1859 * *
1860 * The parser itself *
1861 * Relates to http://www.w3.org/TR/REC-xml *
1862 * *
1863 ************************************************************************/
1864
Daniel Veillard76d66f42001-05-16 21:05:17 +00001865static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001866/**
1867 * xmlParseName:
1868 * @ctxt: an XML parser context
1869 *
1870 * parse an XML name.
1871 *
1872 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1873 * CombiningChar | Extender
1874 *
1875 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1876 *
1877 * [6] Names ::= Name (S Name)*
1878 *
1879 * Returns the Name parsed or NULL
1880 */
1881
1882xmlChar *
1883xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001884 const xmlChar *in;
1885 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001886 int count = 0;
1887
1888 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001889
1890 /*
1891 * Accelerator for simple ASCII names
1892 */
1893 in = ctxt->input->cur;
1894 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1895 ((*in >= 0x41) && (*in <= 0x5A)) ||
1896 (*in == '_') || (*in == ':')) {
1897 in++;
1898 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1899 ((*in >= 0x41) && (*in <= 0x5A)) ||
1900 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001901 (*in == '_') || (*in == '-') ||
1902 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001903 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001904 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001905 count = in - ctxt->input->cur;
1906 ret = xmlStrndup(ctxt->input->cur, count);
1907 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00001908 ctxt->nbChars += count;
1909 ctxt->input->col += count;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00001910 if (ret == NULL) {
1911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1912 ctxt->sax->error(ctxt->userData,
1913 "XML parser: out of memory\n");
1914 ctxt->errNo = XML_ERR_NO_MEMORY;
1915 ctxt->instate = XML_PARSER_EOF;
1916 ctxt->disableSAX = 1;
1917 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00001918 return(ret);
1919 }
1920 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001921 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001922}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001923
Daniel Veillard46de64e2002-05-29 08:21:33 +00001924/**
1925 * xmlParseNameAndCompare:
1926 * @ctxt: an XML parser context
1927 *
1928 * parse an XML name and compares for match
1929 * (specialized for endtag parsing)
1930 *
1931 *
1932 * Returns NULL for an illegal name, (xmlChar*) 1 for success
1933 * and the name for mismatch
1934 */
1935
Daniel Veillardf4862f02002-09-10 11:13:43 +00001936static xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00001937xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
1938 const xmlChar *cmp = other;
1939 const xmlChar *in;
1940 xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00001941
1942 GROW;
1943
1944 in = ctxt->input->cur;
1945 while (*in != 0 && *in == *cmp) {
1946 ++in;
1947 ++cmp;
1948 }
1949 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
1950 /* success */
1951 ctxt->input->cur = in;
1952 return (xmlChar*) 1;
1953 }
1954 /* failure (or end of input buffer), check with full function */
1955 ret = xmlParseName (ctxt);
1956 if (ret != 0 && xmlStrEqual (ret, other)) {
1957 xmlFree (ret);
1958 return (xmlChar*) 1;
1959 }
1960 return ret;
1961}
1962
Daniel Veillard76d66f42001-05-16 21:05:17 +00001963static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001964xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1965 xmlChar buf[XML_MAX_NAMELEN + 5];
1966 int len = 0, l;
1967 int c;
1968 int count = 0;
1969
1970 /*
1971 * Handler for more complex cases
1972 */
1973 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001974 c = CUR_CHAR(l);
1975 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1976 (!IS_LETTER(c) && (c != '_') &&
1977 (c != ':'))) {
1978 return(NULL);
1979 }
1980
1981 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1982 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1983 (c == '.') || (c == '-') ||
1984 (c == '_') || (c == ':') ||
1985 (IS_COMBINING(c)) ||
1986 (IS_EXTENDER(c)))) {
1987 if (count++ > 100) {
1988 count = 0;
1989 GROW;
1990 }
1991 COPY_BUF(l,buf,len,c);
1992 NEXTL(l);
1993 c = CUR_CHAR(l);
1994 if (len >= XML_MAX_NAMELEN) {
1995 /*
1996 * Okay someone managed to make a huge name, so he's ready to pay
1997 * for the processing speed.
1998 */
1999 xmlChar *buffer;
2000 int max = len * 2;
2001
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002002 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002003 if (buffer == NULL) {
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002006 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002007 return(NULL);
2008 }
2009 memcpy(buffer, buf, len);
2010 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
2011 (c == '.') || (c == '-') ||
2012 (c == '_') || (c == ':') ||
2013 (IS_COMBINING(c)) ||
2014 (IS_EXTENDER(c))) {
2015 if (count++ > 100) {
2016 count = 0;
2017 GROW;
2018 }
2019 if (len + 10 > max) {
2020 max *= 2;
2021 buffer = (xmlChar *) xmlRealloc(buffer,
2022 max * sizeof(xmlChar));
2023 if (buffer == NULL) {
2024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2025 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00002026 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002027 return(NULL);
2028 }
2029 }
2030 COPY_BUF(l,buffer,len,c);
2031 NEXTL(l);
2032 c = CUR_CHAR(l);
2033 }
2034 buffer[len] = 0;
2035 return(buffer);
2036 }
2037 }
2038 return(xmlStrndup(buf, len));
2039}
2040
2041/**
2042 * xmlParseStringName:
2043 * @ctxt: an XML parser context
2044 * @str: a pointer to the string pointer (IN/OUT)
2045 *
2046 * parse an XML name.
2047 *
2048 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2049 * CombiningChar | Extender
2050 *
2051 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2052 *
2053 * [6] Names ::= Name (S Name)*
2054 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002055 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002056 * is updated to the current location in the string.
2057 */
2058
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002059static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002060xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2061 xmlChar buf[XML_MAX_NAMELEN + 5];
2062 const xmlChar *cur = *str;
2063 int len = 0, l;
2064 int c;
2065
2066 c = CUR_SCHAR(cur, l);
2067 if (!IS_LETTER(c) && (c != '_') &&
2068 (c != ':')) {
2069 return(NULL);
2070 }
2071
2072 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2073 (c == '.') || (c == '-') ||
2074 (c == '_') || (c == ':') ||
2075 (IS_COMBINING(c)) ||
2076 (IS_EXTENDER(c))) {
2077 COPY_BUF(l,buf,len,c);
2078 cur += l;
2079 c = CUR_SCHAR(cur, l);
2080 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2081 /*
2082 * Okay someone managed to make a huge name, so he's ready to pay
2083 * for the processing speed.
2084 */
2085 xmlChar *buffer;
2086 int max = len * 2;
2087
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002088 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002089 if (buffer == NULL) {
2090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2091 ctxt->sax->error(ctxt->userData,
2092 "xmlParseStringName: out of memory\n");
2093 return(NULL);
2094 }
2095 memcpy(buffer, buf, len);
2096 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2097 (c == '.') || (c == '-') ||
2098 (c == '_') || (c == ':') ||
2099 (IS_COMBINING(c)) ||
2100 (IS_EXTENDER(c))) {
2101 if (len + 10 > max) {
2102 max *= 2;
2103 buffer = (xmlChar *) xmlRealloc(buffer,
2104 max * sizeof(xmlChar));
2105 if (buffer == NULL) {
2106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2107 ctxt->sax->error(ctxt->userData,
2108 "xmlParseStringName: out of memory\n");
2109 return(NULL);
2110 }
2111 }
2112 COPY_BUF(l,buffer,len,c);
2113 cur += l;
2114 c = CUR_SCHAR(cur, l);
2115 }
2116 buffer[len] = 0;
2117 *str = cur;
2118 return(buffer);
2119 }
2120 }
2121 *str = cur;
2122 return(xmlStrndup(buf, len));
2123}
2124
2125/**
2126 * xmlParseNmtoken:
2127 * @ctxt: an XML parser context
2128 *
2129 * parse an XML Nmtoken.
2130 *
2131 * [7] Nmtoken ::= (NameChar)+
2132 *
2133 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2134 *
2135 * Returns the Nmtoken parsed or NULL
2136 */
2137
2138xmlChar *
2139xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2140 xmlChar buf[XML_MAX_NAMELEN + 5];
2141 int len = 0, l;
2142 int c;
2143 int count = 0;
2144
2145 GROW;
2146 c = CUR_CHAR(l);
2147
2148 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2149 (c == '.') || (c == '-') ||
2150 (c == '_') || (c == ':') ||
2151 (IS_COMBINING(c)) ||
2152 (IS_EXTENDER(c))) {
2153 if (count++ > 100) {
2154 count = 0;
2155 GROW;
2156 }
2157 COPY_BUF(l,buf,len,c);
2158 NEXTL(l);
2159 c = CUR_CHAR(l);
2160 if (len >= XML_MAX_NAMELEN) {
2161 /*
2162 * Okay someone managed to make a huge token, so he's ready to pay
2163 * for the processing speed.
2164 */
2165 xmlChar *buffer;
2166 int max = len * 2;
2167
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002168 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002169 if (buffer == NULL) {
2170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2171 ctxt->sax->error(ctxt->userData,
2172 "xmlParseNmtoken: out of memory\n");
2173 return(NULL);
2174 }
2175 memcpy(buffer, buf, len);
2176 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2177 (c == '.') || (c == '-') ||
2178 (c == '_') || (c == ':') ||
2179 (IS_COMBINING(c)) ||
2180 (IS_EXTENDER(c))) {
2181 if (count++ > 100) {
2182 count = 0;
2183 GROW;
2184 }
2185 if (len + 10 > max) {
2186 max *= 2;
2187 buffer = (xmlChar *) xmlRealloc(buffer,
2188 max * sizeof(xmlChar));
2189 if (buffer == NULL) {
2190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2191 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002192 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002193 return(NULL);
2194 }
2195 }
2196 COPY_BUF(l,buffer,len,c);
2197 NEXTL(l);
2198 c = CUR_CHAR(l);
2199 }
2200 buffer[len] = 0;
2201 return(buffer);
2202 }
2203 }
2204 if (len == 0)
2205 return(NULL);
2206 return(xmlStrndup(buf, len));
2207}
2208
2209/**
2210 * xmlParseEntityValue:
2211 * @ctxt: an XML parser context
2212 * @orig: if non-NULL store a copy of the original entity value
2213 *
2214 * parse a value for ENTITY declarations
2215 *
2216 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2217 * "'" ([^%&'] | PEReference | Reference)* "'"
2218 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002219 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
2221
2222xmlChar *
2223xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2224 xmlChar *buf = NULL;
2225 int len = 0;
2226 int size = XML_PARSER_BUFFER_SIZE;
2227 int c, l;
2228 xmlChar stop;
2229 xmlChar *ret = NULL;
2230 const xmlChar *cur = NULL;
2231 xmlParserInputPtr input;
2232
2233 if (RAW == '"') stop = '"';
2234 else if (RAW == '\'') stop = '\'';
2235 else {
2236 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2238 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2239 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002240 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002241 return(NULL);
2242 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002243 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002244 if (buf == NULL) {
2245 xmlGenericError(xmlGenericErrorContext,
2246 "malloc of %d byte failed\n", size);
2247 return(NULL);
2248 }
2249
2250 /*
2251 * The content of the entity definition is copied in a buffer.
2252 */
2253
2254 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2255 input = ctxt->input;
2256 GROW;
2257 NEXT;
2258 c = CUR_CHAR(l);
2259 /*
2260 * NOTE: 4.4.5 Included in Literal
2261 * When a parameter entity reference appears in a literal entity
2262 * value, ... a single or double quote character in the replacement
2263 * text is always treated as a normal data character and will not
2264 * terminate the literal.
2265 * In practice it means we stop the loop only when back at parsing
2266 * the initial entity and the quote is found
2267 */
2268 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2269 (ctxt->input != input))) {
2270 if (len + 5 >= size) {
2271 size *= 2;
2272 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2273 if (buf == NULL) {
2274 xmlGenericError(xmlGenericErrorContext,
2275 "realloc of %d byte failed\n", size);
2276 return(NULL);
2277 }
2278 }
2279 COPY_BUF(l,buf,len,c);
2280 NEXTL(l);
2281 /*
2282 * Pop-up of finished entities.
2283 */
2284 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2285 xmlPopInput(ctxt);
2286
2287 GROW;
2288 c = CUR_CHAR(l);
2289 if (c == 0) {
2290 GROW;
2291 c = CUR_CHAR(l);
2292 }
2293 }
2294 buf[len] = 0;
2295
2296 /*
2297 * Raise problem w.r.t. '&' and '%' being used in non-entities
2298 * reference constructs. Note Charref will be handled in
2299 * xmlStringDecodeEntities()
2300 */
2301 cur = buf;
2302 while (*cur != 0) { /* non input consuming */
2303 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2304 xmlChar *name;
2305 xmlChar tmp = *cur;
2306
2307 cur++;
2308 name = xmlParseStringName(ctxt, &cur);
2309 if ((name == NULL) || (*cur != ';')) {
2310 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2312 ctxt->sax->error(ctxt->userData,
2313 "EntityValue: '%c' forbidden except for entities references\n",
2314 tmp);
2315 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002316 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002317 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002318 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2319 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2322 ctxt->sax->error(ctxt->userData,
2323 "EntityValue: PEReferences forbidden in internal subset\n",
2324 tmp);
2325 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002326 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002327 }
2328 if (name != NULL)
2329 xmlFree(name);
2330 }
2331 cur++;
2332 }
2333
2334 /*
2335 * Then PEReference entities are substituted.
2336 */
2337 if (c != stop) {
2338 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2340 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2341 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002343 xmlFree(buf);
2344 } else {
2345 NEXT;
2346 /*
2347 * NOTE: 4.4.7 Bypassed
2348 * When a general entity reference appears in the EntityValue in
2349 * an entity declaration, it is bypassed and left as is.
2350 * so XML_SUBSTITUTE_REF is not set here.
2351 */
2352 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2353 0, 0, 0);
2354 if (orig != NULL)
2355 *orig = buf;
2356 else
2357 xmlFree(buf);
2358 }
2359
2360 return(ret);
2361}
2362
2363/**
2364 * xmlParseAttValue:
2365 * @ctxt: an XML parser context
2366 *
2367 * parse a value for an attribute
2368 * Note: the parser won't do substitution of entities here, this
2369 * will be handled later in xmlStringGetNodeList
2370 *
2371 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2372 * "'" ([^<&'] | Reference)* "'"
2373 *
2374 * 3.3.3 Attribute-Value Normalization:
2375 * Before the value of an attribute is passed to the application or
2376 * checked for validity, the XML processor must normalize it as follows:
2377 * - a character reference is processed by appending the referenced
2378 * character to the attribute value
2379 * - an entity reference is processed by recursively processing the
2380 * replacement text of the entity
2381 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2382 * appending #x20 to the normalized value, except that only a single
2383 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2384 * parsed entity or the literal entity value of an internal parsed entity
2385 * - other characters are processed by appending them to the normalized value
2386 * If the declared value is not CDATA, then the XML processor must further
2387 * process the normalized attribute value by discarding any leading and
2388 * trailing space (#x20) characters, and by replacing sequences of space
2389 * (#x20) characters by a single space (#x20) character.
2390 * All attributes for which no declaration has been read should be treated
2391 * by a non-validating parser as if declared CDATA.
2392 *
2393 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2394 */
2395
2396xmlChar *
Daniel Veillarde72c7562002-05-31 09:47:30 +00002397xmlParseAttValueComplex(xmlParserCtxtPtr ctxt);
2398
2399xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002400xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2401 xmlChar limit = 0;
Daniel Veillardf4862f02002-09-10 11:13:43 +00002402 const xmlChar *in = NULL;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002403 xmlChar *ret = NULL;
2404 SHRINK;
2405 GROW;
Daniel Veillarde645e8c2002-10-22 17:35:37 +00002406 in = (xmlChar *) CUR_PTR;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002407 if (*in != '"' && *in != '\'') {
2408 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2410 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2411 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002412 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarde72c7562002-05-31 09:47:30 +00002413 return(NULL);
2414 }
2415 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2416 limit = *in;
2417 ++in;
2418
2419 while (*in != limit && *in >= 0x20 && *in <= 0x7f &&
2420 *in != '&' && *in != '<'
2421 ) {
2422 ++in;
2423 }
2424 if (*in != limit) {
2425 return xmlParseAttValueComplex(ctxt);
2426 }
2427 ++in;
2428 ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2);
2429 CUR_PTR = in;
2430 return ret;
2431}
2432
Daniel Veillard01c13b52002-12-10 15:19:08 +00002433/**
2434 * xmlParseAttValueComplex:
2435 * @ctxt: an XML parser context
2436 *
2437 * parse a value for an attribute, this is the fallback function
2438 * of xmlParseAttValue() when the attribute parsing requires handling
2439 * of non-ASCII characters.
2440 *
2441 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2442 */
Daniel Veillarde72c7562002-05-31 09:47:30 +00002443xmlChar *
2444xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) {
2445 xmlChar limit = 0;
2446 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002447 int len = 0;
2448 int buf_size = 0;
2449 int c, l;
2450 xmlChar *current = NULL;
2451 xmlEntityPtr ent;
2452
2453
2454 SHRINK;
2455 if (NXT(0) == '"') {
2456 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2457 limit = '"';
2458 NEXT;
2459 } else if (NXT(0) == '\'') {
2460 limit = '\'';
2461 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2462 NEXT;
2463 } else {
2464 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2466 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2467 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002468 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002469 return(NULL);
2470 }
2471
2472 /*
2473 * allocate a translation buffer.
2474 */
2475 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002476 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002477 if (buf == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +00002478 xmlGenericError(xmlGenericErrorContext,
2479 "xmlParseAttValue: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +00002480 return(NULL);
2481 }
2482
2483 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002484 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002485 */
2486 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002487 while ((NXT(0) != limit) && /* checked */
2488 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002489 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002490 if (c == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00002491 if (NXT(1) == '#') {
2492 int val = xmlParseCharRef(ctxt);
2493 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002494 if (ctxt->replaceEntities) {
2495 if (len > buf_size - 10) {
2496 growBuffer(buf);
2497 }
2498 buf[len++] = '&';
2499 } else {
2500 /*
2501 * The reparsing will be done in xmlStringGetNodeList()
2502 * called by the attribute() function in SAX.c
2503 */
2504 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002505
Daniel Veillard319a7422001-09-11 09:27:09 +00002506 if (len > buf_size - 10) {
2507 growBuffer(buf);
2508 }
2509 current = &buffer[0];
2510 while (*current != 0) { /* non input consuming */
2511 buf[len++] = *current++;
2512 }
Owen Taylor3473f882001-02-23 17:55:21 +00002513 }
2514 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002515 if (len > buf_size - 10) {
2516 growBuffer(buf);
2517 }
Owen Taylor3473f882001-02-23 17:55:21 +00002518 len += xmlCopyChar(0, &buf[len], val);
2519 }
2520 } else {
2521 ent = xmlParseEntityRef(ctxt);
2522 if ((ent != NULL) &&
2523 (ctxt->replaceEntities != 0)) {
2524 xmlChar *rep;
2525
2526 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2527 rep = xmlStringDecodeEntities(ctxt, ent->content,
2528 XML_SUBSTITUTE_REF, 0, 0, 0);
2529 if (rep != NULL) {
2530 current = rep;
2531 while (*current != 0) { /* non input consuming */
2532 buf[len++] = *current++;
2533 if (len > buf_size - 10) {
2534 growBuffer(buf);
2535 }
2536 }
2537 xmlFree(rep);
2538 }
2539 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002540 if (len > buf_size - 10) {
2541 growBuffer(buf);
2542 }
Owen Taylor3473f882001-02-23 17:55:21 +00002543 if (ent->content != NULL)
2544 buf[len++] = ent->content[0];
2545 }
2546 } else if (ent != NULL) {
2547 int i = xmlStrlen(ent->name);
2548 const xmlChar *cur = ent->name;
2549
2550 /*
2551 * This may look absurd but is needed to detect
2552 * entities problems
2553 */
2554 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2555 (ent->content != NULL)) {
2556 xmlChar *rep;
2557 rep = xmlStringDecodeEntities(ctxt, ent->content,
2558 XML_SUBSTITUTE_REF, 0, 0, 0);
2559 if (rep != NULL)
2560 xmlFree(rep);
2561 }
2562
2563 /*
2564 * Just output the reference
2565 */
2566 buf[len++] = '&';
2567 if (len > buf_size - i - 10) {
2568 growBuffer(buf);
2569 }
2570 for (;i > 0;i--)
2571 buf[len++] = *cur++;
2572 buf[len++] = ';';
2573 }
2574 }
2575 } else {
2576 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2577 COPY_BUF(l,buf,len,0x20);
2578 if (len > buf_size - 10) {
2579 growBuffer(buf);
2580 }
2581 } else {
2582 COPY_BUF(l,buf,len,c);
2583 if (len > buf_size - 10) {
2584 growBuffer(buf);
2585 }
2586 }
2587 NEXTL(l);
2588 }
2589 GROW;
2590 c = CUR_CHAR(l);
2591 }
2592 buf[len++] = 0;
2593 if (RAW == '<') {
2594 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2596 ctxt->sax->error(ctxt->userData,
2597 "Unescaped '<' not allowed in attributes values\n");
2598 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002599 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002600 } else if (RAW != limit) {
2601 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2603 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2604 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002605 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002606 } else
2607 NEXT;
2608 return(buf);
2609}
2610
2611/**
2612 * xmlParseSystemLiteral:
2613 * @ctxt: an XML parser context
2614 *
2615 * parse an XML Literal
2616 *
2617 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2618 *
2619 * Returns the SystemLiteral parsed or NULL
2620 */
2621
2622xmlChar *
2623xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2624 xmlChar *buf = NULL;
2625 int len = 0;
2626 int size = XML_PARSER_BUFFER_SIZE;
2627 int cur, l;
2628 xmlChar stop;
2629 int state = ctxt->instate;
2630 int count = 0;
2631
2632 SHRINK;
2633 if (RAW == '"') {
2634 NEXT;
2635 stop = '"';
2636 } else if (RAW == '\'') {
2637 NEXT;
2638 stop = '\'';
2639 } else {
2640 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2642 ctxt->sax->error(ctxt->userData,
2643 "SystemLiteral \" or ' expected\n");
2644 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002645 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002646 return(NULL);
2647 }
2648
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002649 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002650 if (buf == NULL) {
2651 xmlGenericError(xmlGenericErrorContext,
2652 "malloc of %d byte failed\n", size);
2653 return(NULL);
2654 }
2655 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2656 cur = CUR_CHAR(l);
2657 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2658 if (len + 5 >= size) {
2659 size *= 2;
2660 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2661 if (buf == NULL) {
2662 xmlGenericError(xmlGenericErrorContext,
2663 "realloc of %d byte failed\n", size);
2664 ctxt->instate = (xmlParserInputState) state;
2665 return(NULL);
2666 }
2667 }
2668 count++;
2669 if (count > 50) {
2670 GROW;
2671 count = 0;
2672 }
2673 COPY_BUF(l,buf,len,cur);
2674 NEXTL(l);
2675 cur = CUR_CHAR(l);
2676 if (cur == 0) {
2677 GROW;
2678 SHRINK;
2679 cur = CUR_CHAR(l);
2680 }
2681 }
2682 buf[len] = 0;
2683 ctxt->instate = (xmlParserInputState) state;
2684 if (!IS_CHAR(cur)) {
2685 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2687 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2688 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002689 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 } else {
2691 NEXT;
2692 }
2693 return(buf);
2694}
2695
2696/**
2697 * xmlParsePubidLiteral:
2698 * @ctxt: an XML parser context
2699 *
2700 * parse an XML public literal
2701 *
2702 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2703 *
2704 * Returns the PubidLiteral parsed or NULL.
2705 */
2706
2707xmlChar *
2708xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2709 xmlChar *buf = NULL;
2710 int len = 0;
2711 int size = XML_PARSER_BUFFER_SIZE;
2712 xmlChar cur;
2713 xmlChar stop;
2714 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002715 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002716
2717 SHRINK;
2718 if (RAW == '"') {
2719 NEXT;
2720 stop = '"';
2721 } else if (RAW == '\'') {
2722 NEXT;
2723 stop = '\'';
2724 } else {
2725 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727 ctxt->sax->error(ctxt->userData,
2728 "SystemLiteral \" or ' expected\n");
2729 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002730 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002731 return(NULL);
2732 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002734 if (buf == NULL) {
2735 xmlGenericError(xmlGenericErrorContext,
2736 "malloc of %d byte failed\n", size);
2737 return(NULL);
2738 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002739 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002740 cur = CUR;
2741 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2742 if (len + 1 >= size) {
2743 size *= 2;
2744 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2745 if (buf == NULL) {
2746 xmlGenericError(xmlGenericErrorContext,
2747 "realloc of %d byte failed\n", size);
2748 return(NULL);
2749 }
2750 }
2751 buf[len++] = cur;
2752 count++;
2753 if (count > 50) {
2754 GROW;
2755 count = 0;
2756 }
2757 NEXT;
2758 cur = CUR;
2759 if (cur == 0) {
2760 GROW;
2761 SHRINK;
2762 cur = CUR;
2763 }
2764 }
2765 buf[len] = 0;
2766 if (cur != stop) {
2767 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2769 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2770 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002771 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002772 } else {
2773 NEXT;
2774 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002775 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002776 return(buf);
2777}
2778
Daniel Veillard48b2f892001-02-25 16:11:03 +00002779void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002780/**
2781 * xmlParseCharData:
2782 * @ctxt: an XML parser context
2783 * @cdata: int indicating whether we are within a CDATA section
2784 *
2785 * parse a CharData section.
2786 * if we are within a CDATA section ']]>' marks an end of section.
2787 *
2788 * The right angle bracket (>) may be represented using the string "&gt;",
2789 * and must, for compatibility, be escaped using "&gt;" or a character
2790 * reference when it appears in the string "]]>" in content, when that
2791 * string is not marking the end of a CDATA section.
2792 *
2793 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2794 */
2795
2796void
2797xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002798 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002799 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002800 int line = ctxt->input->line;
2801 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002802
2803 SHRINK;
2804 GROW;
2805 /*
2806 * Accelerated common case where input don't need to be
2807 * modified before passing it to the handler.
2808 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00002809 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002810 in = ctxt->input->cur;
2811 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002812get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002813 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2814 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002815 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002816 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002817 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002818 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002819 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002820 ctxt->input->line++;
2821 in++;
2822 }
2823 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002824 }
2825 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002826 if ((in[1] == ']') && (in[2] == '>')) {
2827 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2829 ctxt->sax->error(ctxt->userData,
2830 "Sequence ']]>' not allowed in content\n");
2831 ctxt->input->cur = in;
2832 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002833 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002834 return;
2835 }
2836 in++;
2837 goto get_more;
2838 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002839 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002840 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002841 if (IS_BLANK(*ctxt->input->cur)) {
2842 const xmlChar *tmp = ctxt->input->cur;
2843 ctxt->input->cur = in;
2844 if (areBlanks(ctxt, tmp, nbchar)) {
2845 if (ctxt->sax->ignorableWhitespace != NULL)
2846 ctxt->sax->ignorableWhitespace(ctxt->userData,
2847 tmp, nbchar);
2848 } else {
2849 if (ctxt->sax->characters != NULL)
2850 ctxt->sax->characters(ctxt->userData,
2851 tmp, nbchar);
2852 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002853 line = ctxt->input->line;
2854 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002855 } else {
2856 if (ctxt->sax->characters != NULL)
2857 ctxt->sax->characters(ctxt->userData,
2858 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002859 line = ctxt->input->line;
2860 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002861 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002862 }
2863 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002864 if (*in == 0xD) {
2865 in++;
2866 if (*in == 0xA) {
2867 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002868 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002869 ctxt->input->line++;
2870 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002871 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002872 in--;
2873 }
2874 if (*in == '<') {
2875 return;
2876 }
2877 if (*in == '&') {
2878 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002879 }
2880 SHRINK;
2881 GROW;
2882 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002883 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002884 nbchar = 0;
2885 }
Daniel Veillard50582112001-03-26 22:52:16 +00002886 ctxt->input->line = line;
2887 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002888 xmlParseCharDataComplex(ctxt, cdata);
2889}
2890
Daniel Veillard01c13b52002-12-10 15:19:08 +00002891/**
2892 * xmlParseCharDataComplex:
2893 * @ctxt: an XML parser context
2894 * @cdata: int indicating whether we are within a CDATA section
2895 *
2896 * parse a CharData section.this is the fallback function
2897 * of xmlParseCharData() when the parsing requires handling
2898 * of non-ASCII characters.
2899 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002900void
2901xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002902 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2903 int nbchar = 0;
2904 int cur, l;
2905 int count = 0;
2906
2907 SHRINK;
2908 GROW;
2909 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002910 while ((cur != '<') && /* checked */
2911 (cur != '&') &&
2912 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00002913 if ((cur == ']') && (NXT(1) == ']') &&
2914 (NXT(2) == '>')) {
2915 if (cdata) break;
2916 else {
2917 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2919 ctxt->sax->error(ctxt->userData,
2920 "Sequence ']]>' not allowed in content\n");
2921 /* Should this be relaxed ??? I see a "must here */
2922 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00002923 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002924 }
2925 }
2926 COPY_BUF(l,buf,nbchar,cur);
2927 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2928 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002929 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002930 */
2931 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2932 if (areBlanks(ctxt, buf, nbchar)) {
2933 if (ctxt->sax->ignorableWhitespace != NULL)
2934 ctxt->sax->ignorableWhitespace(ctxt->userData,
2935 buf, nbchar);
2936 } else {
2937 if (ctxt->sax->characters != NULL)
2938 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2939 }
2940 }
2941 nbchar = 0;
2942 }
2943 count++;
2944 if (count > 50) {
2945 GROW;
2946 count = 0;
2947 }
2948 NEXTL(l);
2949 cur = CUR_CHAR(l);
2950 }
2951 if (nbchar != 0) {
2952 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002953 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002954 */
2955 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2956 if (areBlanks(ctxt, buf, nbchar)) {
2957 if (ctxt->sax->ignorableWhitespace != NULL)
2958 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2959 } else {
2960 if (ctxt->sax->characters != NULL)
2961 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2962 }
2963 }
2964 }
2965}
2966
2967/**
2968 * xmlParseExternalID:
2969 * @ctxt: an XML parser context
2970 * @publicID: a xmlChar** receiving PubidLiteral
2971 * @strict: indicate whether we should restrict parsing to only
2972 * production [75], see NOTE below
2973 *
2974 * Parse an External ID or a Public ID
2975 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002976 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002977 * 'PUBLIC' S PubidLiteral S SystemLiteral
2978 *
2979 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2980 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2981 *
2982 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2983 *
2984 * Returns the function returns SystemLiteral and in the second
2985 * case publicID receives PubidLiteral, is strict is off
2986 * it is possible to return NULL and have publicID set.
2987 */
2988
2989xmlChar *
2990xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2991 xmlChar *URI = NULL;
2992
2993 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002994
2995 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002996 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2997 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2998 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2999 SKIP(6);
3000 if (!IS_BLANK(CUR)) {
3001 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3003 ctxt->sax->error(ctxt->userData,
3004 "Space required after 'SYSTEM'\n");
3005 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003006 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003007 }
3008 SKIP_BLANKS;
3009 URI = xmlParseSystemLiteral(ctxt);
3010 if (URI == NULL) {
3011 ctxt->errNo = XML_ERR_URI_REQUIRED;
3012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3013 ctxt->sax->error(ctxt->userData,
3014 "xmlParseExternalID: SYSTEM, no URI\n");
3015 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003016 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003017 }
3018 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3019 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3020 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3021 SKIP(6);
3022 if (!IS_BLANK(CUR)) {
3023 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3025 ctxt->sax->error(ctxt->userData,
3026 "Space required after 'PUBLIC'\n");
3027 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003028 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003029 }
3030 SKIP_BLANKS;
3031 *publicID = xmlParsePubidLiteral(ctxt);
3032 if (*publicID == NULL) {
3033 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
3034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3035 ctxt->sax->error(ctxt->userData,
3036 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
3037 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003038 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003039 }
3040 if (strict) {
3041 /*
3042 * We don't handle [83] so "S SystemLiteral" is required.
3043 */
3044 if (!IS_BLANK(CUR)) {
3045 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047 ctxt->sax->error(ctxt->userData,
3048 "Space required after the Public Identifier\n");
3049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003051 }
3052 } else {
3053 /*
3054 * We handle [83] so we return immediately, if
3055 * "S SystemLiteral" is not detected. From a purely parsing
3056 * point of view that's a nice mess.
3057 */
3058 const xmlChar *ptr;
3059 GROW;
3060
3061 ptr = CUR_PTR;
3062 if (!IS_BLANK(*ptr)) return(NULL);
3063
3064 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3065 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3066 }
3067 SKIP_BLANKS;
3068 URI = xmlParseSystemLiteral(ctxt);
3069 if (URI == NULL) {
3070 ctxt->errNo = XML_ERR_URI_REQUIRED;
3071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3072 ctxt->sax->error(ctxt->userData,
3073 "xmlParseExternalID: PUBLIC, no URI\n");
3074 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003075 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003076 }
3077 }
3078 return(URI);
3079}
3080
3081/**
3082 * xmlParseComment:
3083 * @ctxt: an XML parser context
3084 *
3085 * Skip an XML (SGML) comment <!-- .... -->
3086 * The spec says that "For compatibility, the string "--" (double-hyphen)
3087 * must not occur within comments. "
3088 *
3089 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3090 */
3091void
3092xmlParseComment(xmlParserCtxtPtr ctxt) {
3093 xmlChar *buf = NULL;
3094 int len;
3095 int size = XML_PARSER_BUFFER_SIZE;
3096 int q, ql;
3097 int r, rl;
3098 int cur, l;
3099 xmlParserInputState state;
3100 xmlParserInputPtr input = ctxt->input;
3101 int count = 0;
3102
3103 /*
3104 * Check that there is a comment right here.
3105 */
3106 if ((RAW != '<') || (NXT(1) != '!') ||
3107 (NXT(2) != '-') || (NXT(3) != '-')) return;
3108
3109 state = ctxt->instate;
3110 ctxt->instate = XML_PARSER_COMMENT;
3111 SHRINK;
3112 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003113 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (buf == NULL) {
3115 xmlGenericError(xmlGenericErrorContext,
3116 "malloc of %d byte failed\n", size);
3117 ctxt->instate = state;
3118 return;
3119 }
3120 q = CUR_CHAR(ql);
3121 NEXTL(ql);
3122 r = CUR_CHAR(rl);
3123 NEXTL(rl);
3124 cur = CUR_CHAR(l);
3125 len = 0;
3126 while (IS_CHAR(cur) && /* checked */
3127 ((cur != '>') ||
3128 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003129 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003130 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "Comment must not contain '--' (double-hyphen)`\n");
3134 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003135 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 }
3137 if (len + 5 >= size) {
3138 size *= 2;
3139 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3140 if (buf == NULL) {
3141 xmlGenericError(xmlGenericErrorContext,
3142 "realloc of %d byte failed\n", size);
3143 ctxt->instate = state;
3144 return;
3145 }
3146 }
3147 COPY_BUF(ql,buf,len,q);
3148 q = r;
3149 ql = rl;
3150 r = cur;
3151 rl = l;
3152
3153 count++;
3154 if (count > 50) {
3155 GROW;
3156 count = 0;
3157 }
3158 NEXTL(l);
3159 cur = CUR_CHAR(l);
3160 if (cur == 0) {
3161 SHRINK;
3162 GROW;
3163 cur = CUR_CHAR(l);
3164 }
3165 }
3166 buf[len] = 0;
3167 if (!IS_CHAR(cur)) {
3168 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
3169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3170 ctxt->sax->error(ctxt->userData,
3171 "Comment not terminated \n<!--%.50s\n", buf);
3172 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003173 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003174 xmlFree(buf);
3175 } else {
3176 if (input != ctxt->input) {
3177 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3179 ctxt->sax->error(ctxt->userData,
3180"Comment doesn't start and stop in the same entity\n");
3181 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003182 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003183 }
3184 NEXT;
3185 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3186 (!ctxt->disableSAX))
3187 ctxt->sax->comment(ctxt->userData, buf);
3188 xmlFree(buf);
3189 }
3190 ctxt->instate = state;
3191}
3192
3193/**
3194 * xmlParsePITarget:
3195 * @ctxt: an XML parser context
3196 *
3197 * parse the name of a PI
3198 *
3199 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3200 *
3201 * Returns the PITarget name or NULL
3202 */
3203
3204xmlChar *
3205xmlParsePITarget(xmlParserCtxtPtr ctxt) {
3206 xmlChar *name;
3207
3208 name = xmlParseName(ctxt);
3209 if ((name != NULL) &&
3210 ((name[0] == 'x') || (name[0] == 'X')) &&
3211 ((name[1] == 'm') || (name[1] == 'M')) &&
3212 ((name[2] == 'l') || (name[2] == 'L'))) {
3213 int i;
3214 if ((name[0] == 'x') && (name[1] == 'm') &&
3215 (name[2] == 'l') && (name[3] == 0)) {
3216 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3218 ctxt->sax->error(ctxt->userData,
3219 "XML declaration allowed only at the start of the document\n");
3220 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003221 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003222 return(name);
3223 } else if (name[3] == 0) {
3224 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
3227 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003229 return(name);
3230 }
3231 for (i = 0;;i++) {
3232 if (xmlW3CPIs[i] == NULL) break;
3233 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3234 return(name);
3235 }
3236 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3237 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3238 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003239 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003240 }
3241 }
3242 return(name);
3243}
3244
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003245#ifdef LIBXML_CATALOG_ENABLED
3246/**
3247 * xmlParseCatalogPI:
3248 * @ctxt: an XML parser context
3249 * @catalog: the PI value string
3250 *
3251 * parse an XML Catalog Processing Instruction.
3252 *
3253 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3254 *
3255 * Occurs only if allowed by the user and if happening in the Misc
3256 * part of the document before any doctype informations
3257 * This will add the given catalog to the parsing context in order
3258 * to be used if there is a resolution need further down in the document
3259 */
3260
3261static void
3262xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3263 xmlChar *URL = NULL;
3264 const xmlChar *tmp, *base;
3265 xmlChar marker;
3266
3267 tmp = catalog;
3268 while (IS_BLANK(*tmp)) tmp++;
3269 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3270 goto error;
3271 tmp += 7;
3272 while (IS_BLANK(*tmp)) tmp++;
3273 if (*tmp != '=') {
3274 return;
3275 }
3276 tmp++;
3277 while (IS_BLANK(*tmp)) tmp++;
3278 marker = *tmp;
3279 if ((marker != '\'') && (marker != '"'))
3280 goto error;
3281 tmp++;
3282 base = tmp;
3283 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3284 if (*tmp == 0)
3285 goto error;
3286 URL = xmlStrndup(base, tmp - base);
3287 tmp++;
3288 while (IS_BLANK(*tmp)) tmp++;
3289 if (*tmp != 0)
3290 goto error;
3291
3292 if (URL != NULL) {
3293 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3294 xmlFree(URL);
3295 }
3296 return;
3297
3298error:
3299 ctxt->errNo = XML_WAR_CATALOG_PI;
3300 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3301 ctxt->sax->warning(ctxt->userData,
3302 "Catalog PI syntax error: %s\n", catalog);
3303 if (URL != NULL)
3304 xmlFree(URL);
3305}
3306#endif
3307
Owen Taylor3473f882001-02-23 17:55:21 +00003308/**
3309 * xmlParsePI:
3310 * @ctxt: an XML parser context
3311 *
3312 * parse an XML Processing Instruction.
3313 *
3314 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3315 *
3316 * The processing is transfered to SAX once parsed.
3317 */
3318
3319void
3320xmlParsePI(xmlParserCtxtPtr ctxt) {
3321 xmlChar *buf = NULL;
3322 int len = 0;
3323 int size = XML_PARSER_BUFFER_SIZE;
3324 int cur, l;
3325 xmlChar *target;
3326 xmlParserInputState state;
3327 int count = 0;
3328
3329 if ((RAW == '<') && (NXT(1) == '?')) {
3330 xmlParserInputPtr input = ctxt->input;
3331 state = ctxt->instate;
3332 ctxt->instate = XML_PARSER_PI;
3333 /*
3334 * this is a Processing Instruction.
3335 */
3336 SKIP(2);
3337 SHRINK;
3338
3339 /*
3340 * Parse the target name and check for special support like
3341 * namespace.
3342 */
3343 target = xmlParsePITarget(ctxt);
3344 if (target != NULL) {
3345 if ((RAW == '?') && (NXT(1) == '>')) {
3346 if (input != ctxt->input) {
3347 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3349 ctxt->sax->error(ctxt->userData,
3350 "PI declaration doesn't start and stop in the same entity\n");
3351 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003352 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003353 }
3354 SKIP(2);
3355
3356 /*
3357 * SAX: PI detected.
3358 */
3359 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3360 (ctxt->sax->processingInstruction != NULL))
3361 ctxt->sax->processingInstruction(ctxt->userData,
3362 target, NULL);
3363 ctxt->instate = state;
3364 xmlFree(target);
3365 return;
3366 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003367 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003368 if (buf == NULL) {
3369 xmlGenericError(xmlGenericErrorContext,
3370 "malloc of %d byte failed\n", size);
3371 ctxt->instate = state;
3372 return;
3373 }
3374 cur = CUR;
3375 if (!IS_BLANK(cur)) {
3376 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData,
3379 "xmlParsePI: PI %s space expected\n", target);
3380 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003382 }
3383 SKIP_BLANKS;
3384 cur = CUR_CHAR(l);
3385 while (IS_CHAR(cur) && /* checked */
3386 ((cur != '?') || (NXT(1) != '>'))) {
3387 if (len + 5 >= size) {
3388 size *= 2;
3389 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3390 if (buf == NULL) {
3391 xmlGenericError(xmlGenericErrorContext,
3392 "realloc of %d byte failed\n", size);
3393 ctxt->instate = state;
3394 return;
3395 }
3396 }
3397 count++;
3398 if (count > 50) {
3399 GROW;
3400 count = 0;
3401 }
3402 COPY_BUF(l,buf,len,cur);
3403 NEXTL(l);
3404 cur = CUR_CHAR(l);
3405 if (cur == 0) {
3406 SHRINK;
3407 GROW;
3408 cur = CUR_CHAR(l);
3409 }
3410 }
3411 buf[len] = 0;
3412 if (cur != '?') {
3413 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3415 ctxt->sax->error(ctxt->userData,
3416 "xmlParsePI: PI %s never end ...\n", target);
3417 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003418 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003419 } else {
3420 if (input != ctxt->input) {
3421 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3423 ctxt->sax->error(ctxt->userData,
3424 "PI declaration doesn't start and stop in the same entity\n");
3425 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003426 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003427 }
3428 SKIP(2);
3429
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003430#ifdef LIBXML_CATALOG_ENABLED
3431 if (((state == XML_PARSER_MISC) ||
3432 (state == XML_PARSER_START)) &&
3433 (xmlStrEqual(target, XML_CATALOG_PI))) {
3434 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3435 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3436 (allow == XML_CATA_ALLOW_ALL))
3437 xmlParseCatalogPI(ctxt, buf);
3438 }
3439#endif
3440
3441
Owen Taylor3473f882001-02-23 17:55:21 +00003442 /*
3443 * SAX: PI detected.
3444 */
3445 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3446 (ctxt->sax->processingInstruction != NULL))
3447 ctxt->sax->processingInstruction(ctxt->userData,
3448 target, buf);
3449 }
3450 xmlFree(buf);
3451 xmlFree(target);
3452 } else {
3453 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3455 ctxt->sax->error(ctxt->userData,
3456 "xmlParsePI : no target name\n");
3457 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003458 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003459 }
3460 ctxt->instate = state;
3461 }
3462}
3463
3464/**
3465 * xmlParseNotationDecl:
3466 * @ctxt: an XML parser context
3467 *
3468 * parse a notation declaration
3469 *
3470 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3471 *
3472 * Hence there is actually 3 choices:
3473 * 'PUBLIC' S PubidLiteral
3474 * 'PUBLIC' S PubidLiteral S SystemLiteral
3475 * and 'SYSTEM' S SystemLiteral
3476 *
3477 * See the NOTE on xmlParseExternalID().
3478 */
3479
3480void
3481xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3482 xmlChar *name;
3483 xmlChar *Pubid;
3484 xmlChar *Systemid;
3485
3486 if ((RAW == '<') && (NXT(1) == '!') &&
3487 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3488 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3489 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3490 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3491 xmlParserInputPtr input = ctxt->input;
3492 SHRINK;
3493 SKIP(10);
3494 if (!IS_BLANK(CUR)) {
3495 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3497 ctxt->sax->error(ctxt->userData,
3498 "Space required after '<!NOTATION'\n");
3499 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003500 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return;
3502 }
3503 SKIP_BLANKS;
3504
Daniel Veillard76d66f42001-05-16 21:05:17 +00003505 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003506 if (name == NULL) {
3507 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3509 ctxt->sax->error(ctxt->userData,
3510 "NOTATION: Name expected here\n");
3511 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003512 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003513 return;
3514 }
3515 if (!IS_BLANK(CUR)) {
3516 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3518 ctxt->sax->error(ctxt->userData,
3519 "Space required after the NOTATION name'\n");
3520 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003521 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return;
3523 }
3524 SKIP_BLANKS;
3525
3526 /*
3527 * Parse the IDs.
3528 */
3529 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3530 SKIP_BLANKS;
3531
3532 if (RAW == '>') {
3533 if (input != ctxt->input) {
3534 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3536 ctxt->sax->error(ctxt->userData,
3537"Notation declaration doesn't start and stop in the same entity\n");
3538 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003539 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003540 }
3541 NEXT;
3542 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3543 (ctxt->sax->notationDecl != NULL))
3544 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3545 } else {
3546 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3548 ctxt->sax->error(ctxt->userData,
3549 "'>' required to close NOTATION declaration\n");
3550 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003551 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003552 }
3553 xmlFree(name);
3554 if (Systemid != NULL) xmlFree(Systemid);
3555 if (Pubid != NULL) xmlFree(Pubid);
3556 }
3557}
3558
3559/**
3560 * xmlParseEntityDecl:
3561 * @ctxt: an XML parser context
3562 *
3563 * parse <!ENTITY declarations
3564 *
3565 * [70] EntityDecl ::= GEDecl | PEDecl
3566 *
3567 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3568 *
3569 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3570 *
3571 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3572 *
3573 * [74] PEDef ::= EntityValue | ExternalID
3574 *
3575 * [76] NDataDecl ::= S 'NDATA' S Name
3576 *
3577 * [ VC: Notation Declared ]
3578 * The Name must match the declared name of a notation.
3579 */
3580
3581void
3582xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3583 xmlChar *name = NULL;
3584 xmlChar *value = NULL;
3585 xmlChar *URI = NULL, *literal = NULL;
3586 xmlChar *ndata = NULL;
3587 int isParameter = 0;
3588 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003589 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003590
3591 GROW;
3592 if ((RAW == '<') && (NXT(1) == '!') &&
3593 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3594 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3595 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3596 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003597 SHRINK;
3598 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003599 skipped = SKIP_BLANKS;
3600 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003601 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3603 ctxt->sax->error(ctxt->userData,
3604 "Space required after '<!ENTITY'\n");
3605 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003606 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003607 }
Owen Taylor3473f882001-02-23 17:55:21 +00003608
3609 if (RAW == '%') {
3610 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003611 skipped = SKIP_BLANKS;
3612 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003613 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3615 ctxt->sax->error(ctxt->userData,
3616 "Space required after '%'\n");
3617 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003618 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003619 }
Owen Taylor3473f882001-02-23 17:55:21 +00003620 isParameter = 1;
3621 }
3622
Daniel Veillard76d66f42001-05-16 21:05:17 +00003623 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003624 if (name == NULL) {
3625 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3627 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3628 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003629 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003630 return;
3631 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003632 skipped = SKIP_BLANKS;
3633 if (skipped == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3636 ctxt->sax->error(ctxt->userData,
3637 "Space required after the entity name\n");
3638 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003639 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003640 }
Owen Taylor3473f882001-02-23 17:55:21 +00003641
Daniel Veillardf5582f12002-06-11 10:08:16 +00003642 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003643 /*
3644 * handle the various case of definitions...
3645 */
3646 if (isParameter) {
3647 if ((RAW == '"') || (RAW == '\'')) {
3648 value = xmlParseEntityValue(ctxt, &orig);
3649 if (value) {
3650 if ((ctxt->sax != NULL) &&
3651 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3652 ctxt->sax->entityDecl(ctxt->userData, name,
3653 XML_INTERNAL_PARAMETER_ENTITY,
3654 NULL, NULL, value);
3655 }
3656 } else {
3657 URI = xmlParseExternalID(ctxt, &literal, 1);
3658 if ((URI == NULL) && (literal == NULL)) {
3659 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Entity value required\n");
3663 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003664 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003665 }
3666 if (URI) {
3667 xmlURIPtr uri;
3668
3669 uri = xmlParseURI((const char *) URI);
3670 if (uri == NULL) {
3671 ctxt->errNo = XML_ERR_INVALID_URI;
3672 if ((ctxt->sax != NULL) &&
3673 (!ctxt->disableSAX) &&
3674 (ctxt->sax->error != NULL))
3675 ctxt->sax->error(ctxt->userData,
3676 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003677 /*
3678 * This really ought to be a well formedness error
3679 * but the XML Core WG decided otherwise c.f. issue
3680 * E26 of the XML erratas.
3681 */
Owen Taylor3473f882001-02-23 17:55:21 +00003682 } else {
3683 if (uri->fragment != NULL) {
3684 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3685 if ((ctxt->sax != NULL) &&
3686 (!ctxt->disableSAX) &&
3687 (ctxt->sax->error != NULL))
3688 ctxt->sax->error(ctxt->userData,
3689 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003690 /*
3691 * Okay this is foolish to block those but not
3692 * invalid URIs.
3693 */
Owen Taylor3473f882001-02-23 17:55:21 +00003694 ctxt->wellFormed = 0;
3695 } else {
3696 if ((ctxt->sax != NULL) &&
3697 (!ctxt->disableSAX) &&
3698 (ctxt->sax->entityDecl != NULL))
3699 ctxt->sax->entityDecl(ctxt->userData, name,
3700 XML_EXTERNAL_PARAMETER_ENTITY,
3701 literal, URI, NULL);
3702 }
3703 xmlFreeURI(uri);
3704 }
3705 }
3706 }
3707 } else {
3708 if ((RAW == '"') || (RAW == '\'')) {
3709 value = xmlParseEntityValue(ctxt, &orig);
3710 if ((ctxt->sax != NULL) &&
3711 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3712 ctxt->sax->entityDecl(ctxt->userData, name,
3713 XML_INTERNAL_GENERAL_ENTITY,
3714 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003715 /*
3716 * For expat compatibility in SAX mode.
3717 */
3718 if ((ctxt->myDoc == NULL) ||
3719 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3720 if (ctxt->myDoc == NULL) {
3721 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3722 }
3723 if (ctxt->myDoc->intSubset == NULL)
3724 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3725 BAD_CAST "fake", NULL, NULL);
3726
3727 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3728 NULL, NULL, value);
3729 }
Owen Taylor3473f882001-02-23 17:55:21 +00003730 } else {
3731 URI = xmlParseExternalID(ctxt, &literal, 1);
3732 if ((URI == NULL) && (literal == NULL)) {
3733 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736 "Entity value required\n");
3737 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003738 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 if (URI) {
3741 xmlURIPtr uri;
3742
3743 uri = xmlParseURI((const char *)URI);
3744 if (uri == NULL) {
3745 ctxt->errNo = XML_ERR_INVALID_URI;
3746 if ((ctxt->sax != NULL) &&
3747 (!ctxt->disableSAX) &&
3748 (ctxt->sax->error != NULL))
3749 ctxt->sax->error(ctxt->userData,
3750 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003751 /*
3752 * This really ought to be a well formedness error
3753 * but the XML Core WG decided otherwise c.f. issue
3754 * E26 of the XML erratas.
3755 */
Owen Taylor3473f882001-02-23 17:55:21 +00003756 } else {
3757 if (uri->fragment != NULL) {
3758 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3759 if ((ctxt->sax != NULL) &&
3760 (!ctxt->disableSAX) &&
3761 (ctxt->sax->error != NULL))
3762 ctxt->sax->error(ctxt->userData,
3763 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003764 /*
3765 * Okay this is foolish to block those but not
3766 * invalid URIs.
3767 */
Owen Taylor3473f882001-02-23 17:55:21 +00003768 ctxt->wellFormed = 0;
3769 }
3770 xmlFreeURI(uri);
3771 }
3772 }
3773 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3774 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3776 ctxt->sax->error(ctxt->userData,
3777 "Space required before 'NDATA'\n");
3778 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003780 }
3781 SKIP_BLANKS;
3782 if ((RAW == 'N') && (NXT(1) == 'D') &&
3783 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3784 (NXT(4) == 'A')) {
3785 SKIP(5);
3786 if (!IS_BLANK(CUR)) {
3787 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3789 ctxt->sax->error(ctxt->userData,
3790 "Space required after 'NDATA'\n");
3791 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003792 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 }
3794 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003795 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003796 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3797 (ctxt->sax->unparsedEntityDecl != NULL))
3798 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3799 literal, URI, ndata);
3800 } else {
3801 if ((ctxt->sax != NULL) &&
3802 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3803 ctxt->sax->entityDecl(ctxt->userData, name,
3804 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3805 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003806 /*
3807 * For expat compatibility in SAX mode.
3808 * assuming the entity repalcement was asked for
3809 */
3810 if ((ctxt->replaceEntities != 0) &&
3811 ((ctxt->myDoc == NULL) ||
3812 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3813 if (ctxt->myDoc == NULL) {
3814 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3815 }
3816
3817 if (ctxt->myDoc->intSubset == NULL)
3818 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3819 BAD_CAST "fake", NULL, NULL);
3820 entityDecl(ctxt, name,
3821 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3822 literal, URI, NULL);
3823 }
Owen Taylor3473f882001-02-23 17:55:21 +00003824 }
3825 }
3826 }
3827 SKIP_BLANKS;
3828 if (RAW != '>') {
3829 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "xmlParseEntityDecl: entity %s not terminated\n", name);
3833 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003834 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 } else {
3836 if (input != ctxt->input) {
3837 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3839 ctxt->sax->error(ctxt->userData,
3840"Entity declaration doesn't start and stop in the same entity\n");
3841 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003842 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003843 }
3844 NEXT;
3845 }
3846 if (orig != NULL) {
3847 /*
3848 * Ugly mechanism to save the raw entity value.
3849 */
3850 xmlEntityPtr cur = NULL;
3851
3852 if (isParameter) {
3853 if ((ctxt->sax != NULL) &&
3854 (ctxt->sax->getParameterEntity != NULL))
3855 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3856 } else {
3857 if ((ctxt->sax != NULL) &&
3858 (ctxt->sax->getEntity != NULL))
3859 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003860 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3861 cur = getEntity(ctxt, name);
3862 }
Owen Taylor3473f882001-02-23 17:55:21 +00003863 }
3864 if (cur != NULL) {
3865 if (cur->orig != NULL)
3866 xmlFree(orig);
3867 else
3868 cur->orig = orig;
3869 } else
3870 xmlFree(orig);
3871 }
3872 if (name != NULL) xmlFree(name);
3873 if (value != NULL) xmlFree(value);
3874 if (URI != NULL) xmlFree(URI);
3875 if (literal != NULL) xmlFree(literal);
3876 if (ndata != NULL) xmlFree(ndata);
3877 }
3878}
3879
3880/**
3881 * xmlParseDefaultDecl:
3882 * @ctxt: an XML parser context
3883 * @value: Receive a possible fixed default value for the attribute
3884 *
3885 * Parse an attribute default declaration
3886 *
3887 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3888 *
3889 * [ VC: Required Attribute ]
3890 * if the default declaration is the keyword #REQUIRED, then the
3891 * attribute must be specified for all elements of the type in the
3892 * attribute-list declaration.
3893 *
3894 * [ VC: Attribute Default Legal ]
3895 * The declared default value must meet the lexical constraints of
3896 * the declared attribute type c.f. xmlValidateAttributeDecl()
3897 *
3898 * [ VC: Fixed Attribute Default ]
3899 * if an attribute has a default value declared with the #FIXED
3900 * keyword, instances of that attribute must match the default value.
3901 *
3902 * [ WFC: No < in Attribute Values ]
3903 * handled in xmlParseAttValue()
3904 *
3905 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3906 * or XML_ATTRIBUTE_FIXED.
3907 */
3908
3909int
3910xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3911 int val;
3912 xmlChar *ret;
3913
3914 *value = NULL;
3915 if ((RAW == '#') && (NXT(1) == 'R') &&
3916 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3917 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3918 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3919 (NXT(8) == 'D')) {
3920 SKIP(9);
3921 return(XML_ATTRIBUTE_REQUIRED);
3922 }
3923 if ((RAW == '#') && (NXT(1) == 'I') &&
3924 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3925 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3926 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3927 SKIP(8);
3928 return(XML_ATTRIBUTE_IMPLIED);
3929 }
3930 val = XML_ATTRIBUTE_NONE;
3931 if ((RAW == '#') && (NXT(1) == 'F') &&
3932 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3933 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3934 SKIP(6);
3935 val = XML_ATTRIBUTE_FIXED;
3936 if (!IS_BLANK(CUR)) {
3937 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3939 ctxt->sax->error(ctxt->userData,
3940 "Space required after '#FIXED'\n");
3941 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003942 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 }
3944 SKIP_BLANKS;
3945 }
3946 ret = xmlParseAttValue(ctxt);
3947 ctxt->instate = XML_PARSER_DTD;
3948 if (ret == NULL) {
3949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3950 ctxt->sax->error(ctxt->userData,
3951 "Attribute default value declaration error\n");
3952 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003953 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003954 } else
3955 *value = ret;
3956 return(val);
3957}
3958
3959/**
3960 * xmlParseNotationType:
3961 * @ctxt: an XML parser context
3962 *
3963 * parse an Notation attribute type.
3964 *
3965 * Note: the leading 'NOTATION' S part has already being parsed...
3966 *
3967 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3968 *
3969 * [ VC: Notation Attributes ]
3970 * Values of this type must match one of the notation names included
3971 * in the declaration; all notation names in the declaration must be declared.
3972 *
3973 * Returns: the notation attribute tree built while parsing
3974 */
3975
3976xmlEnumerationPtr
3977xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3978 xmlChar *name;
3979 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3980
3981 if (RAW != '(') {
3982 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3984 ctxt->sax->error(ctxt->userData,
3985 "'(' required to start 'NOTATION'\n");
3986 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00003987 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003988 return(NULL);
3989 }
3990 SHRINK;
3991 do {
3992 NEXT;
3993 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (name == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "Name expected in NOTATION declaration\n");
4000 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004001 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004002 return(ret);
4003 }
4004 cur = xmlCreateEnumeration(name);
4005 xmlFree(name);
4006 if (cur == NULL) return(ret);
4007 if (last == NULL) ret = last = cur;
4008 else {
4009 last->next = cur;
4010 last = cur;
4011 }
4012 SKIP_BLANKS;
4013 } while (RAW == '|');
4014 if (RAW != ')') {
4015 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "')' required to finish NOTATION declaration\n");
4019 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004020 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004021 if ((last != NULL) && (last != ret))
4022 xmlFreeEnumeration(last);
4023 return(ret);
4024 }
4025 NEXT;
4026 return(ret);
4027}
4028
4029/**
4030 * xmlParseEnumerationType:
4031 * @ctxt: an XML parser context
4032 *
4033 * parse an Enumeration attribute type.
4034 *
4035 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4036 *
4037 * [ VC: Enumeration ]
4038 * Values of this type must match one of the Nmtoken tokens in
4039 * the declaration
4040 *
4041 * Returns: the enumeration attribute tree built while parsing
4042 */
4043
4044xmlEnumerationPtr
4045xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4046 xmlChar *name;
4047 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4048
4049 if (RAW != '(') {
4050 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
4051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4052 ctxt->sax->error(ctxt->userData,
4053 "'(' required to start ATTLIST enumeration\n");
4054 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004055 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004056 return(NULL);
4057 }
4058 SHRINK;
4059 do {
4060 NEXT;
4061 SKIP_BLANKS;
4062 name = xmlParseNmtoken(ctxt);
4063 if (name == NULL) {
4064 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
4065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4066 ctxt->sax->error(ctxt->userData,
4067 "NmToken expected in ATTLIST enumeration\n");
4068 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004069 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004070 return(ret);
4071 }
4072 cur = xmlCreateEnumeration(name);
4073 xmlFree(name);
4074 if (cur == NULL) return(ret);
4075 if (last == NULL) ret = last = cur;
4076 else {
4077 last->next = cur;
4078 last = cur;
4079 }
4080 SKIP_BLANKS;
4081 } while (RAW == '|');
4082 if (RAW != ')') {
4083 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
4084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4085 ctxt->sax->error(ctxt->userData,
4086 "')' required to finish ATTLIST enumeration\n");
4087 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004088 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004089 return(ret);
4090 }
4091 NEXT;
4092 return(ret);
4093}
4094
4095/**
4096 * xmlParseEnumeratedType:
4097 * @ctxt: an XML parser context
4098 * @tree: the enumeration tree built while parsing
4099 *
4100 * parse an Enumerated attribute type.
4101 *
4102 * [57] EnumeratedType ::= NotationType | Enumeration
4103 *
4104 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4105 *
4106 *
4107 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4108 */
4109
4110int
4111xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4112 if ((RAW == 'N') && (NXT(1) == 'O') &&
4113 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4114 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4115 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4116 SKIP(8);
4117 if (!IS_BLANK(CUR)) {
4118 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4120 ctxt->sax->error(ctxt->userData,
4121 "Space required after 'NOTATION'\n");
4122 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004123 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004124 return(0);
4125 }
4126 SKIP_BLANKS;
4127 *tree = xmlParseNotationType(ctxt);
4128 if (*tree == NULL) return(0);
4129 return(XML_ATTRIBUTE_NOTATION);
4130 }
4131 *tree = xmlParseEnumerationType(ctxt);
4132 if (*tree == NULL) return(0);
4133 return(XML_ATTRIBUTE_ENUMERATION);
4134}
4135
4136/**
4137 * xmlParseAttributeType:
4138 * @ctxt: an XML parser context
4139 * @tree: the enumeration tree built while parsing
4140 *
4141 * parse the Attribute list def for an element
4142 *
4143 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4144 *
4145 * [55] StringType ::= 'CDATA'
4146 *
4147 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4148 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4149 *
4150 * Validity constraints for attribute values syntax are checked in
4151 * xmlValidateAttributeValue()
4152 *
4153 * [ VC: ID ]
4154 * Values of type ID must match the Name production. A name must not
4155 * appear more than once in an XML document as a value of this type;
4156 * i.e., ID values must uniquely identify the elements which bear them.
4157 *
4158 * [ VC: One ID per Element Type ]
4159 * No element type may have more than one ID attribute specified.
4160 *
4161 * [ VC: ID Attribute Default ]
4162 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4163 *
4164 * [ VC: IDREF ]
4165 * Values of type IDREF must match the Name production, and values
4166 * of type IDREFS must match Names; each IDREF Name must match the value
4167 * of an ID attribute on some element in the XML document; i.e. IDREF
4168 * values must match the value of some ID attribute.
4169 *
4170 * [ VC: Entity Name ]
4171 * Values of type ENTITY must match the Name production, values
4172 * of type ENTITIES must match Names; each Entity Name must match the
4173 * name of an unparsed entity declared in the DTD.
4174 *
4175 * [ VC: Name Token ]
4176 * Values of type NMTOKEN must match the Nmtoken production; values
4177 * of type NMTOKENS must match Nmtokens.
4178 *
4179 * Returns the attribute type
4180 */
4181int
4182xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4183 SHRINK;
4184 if ((RAW == 'C') && (NXT(1) == 'D') &&
4185 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4186 (NXT(4) == 'A')) {
4187 SKIP(5);
4188 return(XML_ATTRIBUTE_CDATA);
4189 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4190 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4191 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4192 SKIP(6);
4193 return(XML_ATTRIBUTE_IDREFS);
4194 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4195 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4196 (NXT(4) == 'F')) {
4197 SKIP(5);
4198 return(XML_ATTRIBUTE_IDREF);
4199 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4200 SKIP(2);
4201 return(XML_ATTRIBUTE_ID);
4202 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4203 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4204 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4205 SKIP(6);
4206 return(XML_ATTRIBUTE_ENTITY);
4207 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4208 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4209 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4210 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4211 SKIP(8);
4212 return(XML_ATTRIBUTE_ENTITIES);
4213 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4214 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4215 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4216 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4217 SKIP(8);
4218 return(XML_ATTRIBUTE_NMTOKENS);
4219 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4220 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4221 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4222 (NXT(6) == 'N')) {
4223 SKIP(7);
4224 return(XML_ATTRIBUTE_NMTOKEN);
4225 }
4226 return(xmlParseEnumeratedType(ctxt, tree));
4227}
4228
4229/**
4230 * xmlParseAttributeListDecl:
4231 * @ctxt: an XML parser context
4232 *
4233 * : parse the Attribute list def for an element
4234 *
4235 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4236 *
4237 * [53] AttDef ::= S Name S AttType S DefaultDecl
4238 *
4239 */
4240void
4241xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4242 xmlChar *elemName;
4243 xmlChar *attrName;
4244 xmlEnumerationPtr tree;
4245
4246 if ((RAW == '<') && (NXT(1) == '!') &&
4247 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4248 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4249 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4250 (NXT(8) == 'T')) {
4251 xmlParserInputPtr input = ctxt->input;
4252
4253 SKIP(9);
4254 if (!IS_BLANK(CUR)) {
4255 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4257 ctxt->sax->error(ctxt->userData,
4258 "Space required after '<!ATTLIST'\n");
4259 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004260 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004261 }
4262 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004263 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 if (elemName == NULL) {
4265 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4267 ctxt->sax->error(ctxt->userData,
4268 "ATTLIST: no name for Element\n");
4269 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004270 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004271 return;
4272 }
4273 SKIP_BLANKS;
4274 GROW;
4275 while (RAW != '>') {
4276 const xmlChar *check = CUR_PTR;
4277 int type;
4278 int def;
4279 xmlChar *defaultValue = NULL;
4280
4281 GROW;
4282 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004283 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004284 if (attrName == NULL) {
4285 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4287 ctxt->sax->error(ctxt->userData,
4288 "ATTLIST: no name for Attribute\n");
4289 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004290 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004291 break;
4292 }
4293 GROW;
4294 if (!IS_BLANK(CUR)) {
4295 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4297 ctxt->sax->error(ctxt->userData,
4298 "Space required after the attribute name\n");
4299 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004300 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004301 if (attrName != NULL)
4302 xmlFree(attrName);
4303 if (defaultValue != NULL)
4304 xmlFree(defaultValue);
4305 break;
4306 }
4307 SKIP_BLANKS;
4308
4309 type = xmlParseAttributeType(ctxt, &tree);
4310 if (type <= 0) {
4311 if (attrName != NULL)
4312 xmlFree(attrName);
4313 if (defaultValue != NULL)
4314 xmlFree(defaultValue);
4315 break;
4316 }
4317
4318 GROW;
4319 if (!IS_BLANK(CUR)) {
4320 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4322 ctxt->sax->error(ctxt->userData,
4323 "Space required after the attribute type\n");
4324 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004325 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004326 if (attrName != NULL)
4327 xmlFree(attrName);
4328 if (defaultValue != NULL)
4329 xmlFree(defaultValue);
4330 if (tree != NULL)
4331 xmlFreeEnumeration(tree);
4332 break;
4333 }
4334 SKIP_BLANKS;
4335
4336 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4337 if (def <= 0) {
4338 if (attrName != NULL)
4339 xmlFree(attrName);
4340 if (defaultValue != NULL)
4341 xmlFree(defaultValue);
4342 if (tree != NULL)
4343 xmlFreeEnumeration(tree);
4344 break;
4345 }
4346
4347 GROW;
4348 if (RAW != '>') {
4349 if (!IS_BLANK(CUR)) {
4350 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352 ctxt->sax->error(ctxt->userData,
4353 "Space required after the attribute default value\n");
4354 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004355 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 if (attrName != NULL)
4357 xmlFree(attrName);
4358 if (defaultValue != NULL)
4359 xmlFree(defaultValue);
4360 if (tree != NULL)
4361 xmlFreeEnumeration(tree);
4362 break;
4363 }
4364 SKIP_BLANKS;
4365 }
4366 if (check == CUR_PTR) {
4367 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4369 ctxt->sax->error(ctxt->userData,
4370 "xmlParseAttributeListDecl: detected internal error\n");
4371 if (attrName != NULL)
4372 xmlFree(attrName);
4373 if (defaultValue != NULL)
4374 xmlFree(defaultValue);
4375 if (tree != NULL)
4376 xmlFreeEnumeration(tree);
4377 break;
4378 }
4379 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4380 (ctxt->sax->attributeDecl != NULL))
4381 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4382 type, def, defaultValue, tree);
4383 if (attrName != NULL)
4384 xmlFree(attrName);
4385 if (defaultValue != NULL)
4386 xmlFree(defaultValue);
4387 GROW;
4388 }
4389 if (RAW == '>') {
4390 if (input != ctxt->input) {
4391 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4393 ctxt->sax->error(ctxt->userData,
4394"Attribute list declaration doesn't start and stop in the same entity\n");
4395 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004396 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004397 }
4398 NEXT;
4399 }
4400
4401 xmlFree(elemName);
4402 }
4403}
4404
4405/**
4406 * xmlParseElementMixedContentDecl:
4407 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004408 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004409 *
4410 * parse the declaration for a Mixed Element content
4411 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4412 *
4413 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4414 * '(' S? '#PCDATA' S? ')'
4415 *
4416 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4417 *
4418 * [ VC: No Duplicate Types ]
4419 * The same name must not appear more than once in a single
4420 * mixed-content declaration.
4421 *
4422 * returns: the list of the xmlElementContentPtr describing the element choices
4423 */
4424xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004425xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004426 xmlElementContentPtr ret = NULL, cur = NULL, n;
4427 xmlChar *elem = NULL;
4428
4429 GROW;
4430 if ((RAW == '#') && (NXT(1) == 'P') &&
4431 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4432 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4433 (NXT(6) == 'A')) {
4434 SKIP(7);
4435 SKIP_BLANKS;
4436 SHRINK;
4437 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004438 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4439 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4440 if (ctxt->vctxt.error != NULL)
4441 ctxt->vctxt.error(ctxt->vctxt.userData,
4442"Element content declaration doesn't start and stop in the same entity\n");
4443 ctxt->valid = 0;
4444 }
Owen Taylor3473f882001-02-23 17:55:21 +00004445 NEXT;
4446 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4447 if (RAW == '*') {
4448 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4449 NEXT;
4450 }
4451 return(ret);
4452 }
4453 if ((RAW == '(') || (RAW == '|')) {
4454 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4455 if (ret == NULL) return(NULL);
4456 }
4457 while (RAW == '|') {
4458 NEXT;
4459 if (elem == NULL) {
4460 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4461 if (ret == NULL) return(NULL);
4462 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004463 if (cur != NULL)
4464 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004465 cur = ret;
4466 } else {
4467 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4468 if (n == NULL) return(NULL);
4469 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004470 if (n->c1 != NULL)
4471 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004472 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004473 if (n != NULL)
4474 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004475 cur = n;
4476 xmlFree(elem);
4477 }
4478 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004479 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004480 if (elem == NULL) {
4481 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4483 ctxt->sax->error(ctxt->userData,
4484 "xmlParseElementMixedContentDecl : Name expected\n");
4485 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004486 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004487 xmlFreeElementContent(cur);
4488 return(NULL);
4489 }
4490 SKIP_BLANKS;
4491 GROW;
4492 }
4493 if ((RAW == ')') && (NXT(1) == '*')) {
4494 if (elem != NULL) {
4495 cur->c2 = xmlNewElementContent(elem,
4496 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004497 if (cur->c2 != NULL)
4498 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004499 xmlFree(elem);
4500 }
4501 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004502 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4503 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4504 if (ctxt->vctxt.error != NULL)
4505 ctxt->vctxt.error(ctxt->vctxt.userData,
4506"Element content declaration doesn't start and stop in the same entity\n");
4507 ctxt->valid = 0;
4508 }
Owen Taylor3473f882001-02-23 17:55:21 +00004509 SKIP(2);
4510 } else {
4511 if (elem != NULL) xmlFree(elem);
4512 xmlFreeElementContent(ret);
4513 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4515 ctxt->sax->error(ctxt->userData,
4516 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4517 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004518 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004519 return(NULL);
4520 }
4521
4522 } else {
4523 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4525 ctxt->sax->error(ctxt->userData,
4526 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4527 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004528 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004529 }
4530 return(ret);
4531}
4532
4533/**
4534 * xmlParseElementChildrenContentDecl:
4535 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004536 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004537 *
4538 * parse the declaration for a Mixed Element content
4539 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4540 *
4541 *
4542 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4543 *
4544 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4545 *
4546 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4547 *
4548 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4549 *
4550 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4551 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004552 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004553 * opening or closing parentheses in a choice, seq, or Mixed
4554 * construct is contained in the replacement text for a parameter
4555 * entity, both must be contained in the same replacement text. For
4556 * interoperability, if a parameter-entity reference appears in a
4557 * choice, seq, or Mixed construct, its replacement text should not
4558 * be empty, and neither the first nor last non-blank character of
4559 * the replacement text should be a connector (| or ,).
4560 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004561 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004562 * hierarchy.
4563 */
4564xmlElementContentPtr
Owen Taylor3473f882001-02-23 17:55:21 +00004565xmlParseElementChildrenContentDecl
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004566(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004567 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4568 xmlChar *elem;
4569 xmlChar type = 0;
4570
4571 SKIP_BLANKS;
4572 GROW;
4573 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004574 xmlParserInputPtr input = ctxt->input;
4575
Owen Taylor3473f882001-02-23 17:55:21 +00004576 /* Recurse on first child */
4577 NEXT;
4578 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004579 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004580 SKIP_BLANKS;
4581 GROW;
4582 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004583 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004584 if (elem == NULL) {
4585 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4589 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004590 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004591 return(NULL);
4592 }
4593 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004594 if (cur == NULL) {
4595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4596 ctxt->sax->error(ctxt->userData,
4597 "xmlParseElementChildrenContentDecl : out of memory\n");
4598 ctxt->errNo = XML_ERR_NO_MEMORY;
4599 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
4600 xmlFree(elem);
4601 return(NULL);
4602 }
Owen Taylor3473f882001-02-23 17:55:21 +00004603 GROW;
4604 if (RAW == '?') {
4605 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4606 NEXT;
4607 } else if (RAW == '*') {
4608 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4609 NEXT;
4610 } else if (RAW == '+') {
4611 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4612 NEXT;
4613 } else {
4614 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4615 }
4616 xmlFree(elem);
4617 GROW;
4618 }
4619 SKIP_BLANKS;
4620 SHRINK;
4621 while (RAW != ')') {
4622 /*
4623 * Each loop we parse one separator and one element.
4624 */
4625 if (RAW == ',') {
4626 if (type == 0) type = CUR;
4627
4628 /*
4629 * Detect "Name | Name , Name" error
4630 */
4631 else if (type != CUR) {
4632 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4634 ctxt->sax->error(ctxt->userData,
4635 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4636 type);
4637 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004638 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004639 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004640 xmlFreeElementContent(last);
4641 if (ret != NULL)
4642 xmlFreeElementContent(ret);
4643 return(NULL);
4644 }
4645 NEXT;
4646
4647 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4648 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004649 if ((last != NULL) && (last != ret))
4650 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004651 xmlFreeElementContent(ret);
4652 return(NULL);
4653 }
4654 if (last == NULL) {
4655 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004656 if (ret != NULL)
4657 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004658 ret = cur = op;
4659 } else {
4660 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004661 if (op != NULL)
4662 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004663 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004664 if (last != NULL)
4665 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004666 cur =op;
4667 last = NULL;
4668 }
4669 } else if (RAW == '|') {
4670 if (type == 0) type = CUR;
4671
4672 /*
4673 * Detect "Name , Name | Name" error
4674 */
4675 else if (type != CUR) {
4676 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4678 ctxt->sax->error(ctxt->userData,
4679 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4680 type);
4681 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004682 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004683 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004684 xmlFreeElementContent(last);
4685 if (ret != NULL)
4686 xmlFreeElementContent(ret);
4687 return(NULL);
4688 }
4689 NEXT;
4690
4691 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4692 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004693 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004694 xmlFreeElementContent(last);
4695 if (ret != NULL)
4696 xmlFreeElementContent(ret);
4697 return(NULL);
4698 }
4699 if (last == NULL) {
4700 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004701 if (ret != NULL)
4702 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 ret = cur = op;
4704 } else {
4705 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004706 if (op != NULL)
4707 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004708 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004709 if (last != NULL)
4710 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004711 cur =op;
4712 last = NULL;
4713 }
4714 } else {
4715 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4717 ctxt->sax->error(ctxt->userData,
4718 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4719 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004720 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004721 if (ret != NULL)
4722 xmlFreeElementContent(ret);
4723 return(NULL);
4724 }
4725 GROW;
4726 SKIP_BLANKS;
4727 GROW;
4728 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004729 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004730 /* Recurse on second child */
4731 NEXT;
4732 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004733 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004734 SKIP_BLANKS;
4735 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004736 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004737 if (elem == NULL) {
4738 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4740 ctxt->sax->error(ctxt->userData,
4741 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (ret != NULL)
4745 xmlFreeElementContent(ret);
4746 return(NULL);
4747 }
4748 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4749 xmlFree(elem);
4750 if (RAW == '?') {
4751 last->ocur = XML_ELEMENT_CONTENT_OPT;
4752 NEXT;
4753 } else if (RAW == '*') {
4754 last->ocur = XML_ELEMENT_CONTENT_MULT;
4755 NEXT;
4756 } else if (RAW == '+') {
4757 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4758 NEXT;
4759 } else {
4760 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4761 }
4762 }
4763 SKIP_BLANKS;
4764 GROW;
4765 }
4766 if ((cur != NULL) && (last != NULL)) {
4767 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004768 if (last != NULL)
4769 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004770 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004771 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4772 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4773 if (ctxt->vctxt.error != NULL)
4774 ctxt->vctxt.error(ctxt->vctxt.userData,
4775"Element content declaration doesn't start and stop in the same entity\n");
4776 ctxt->valid = 0;
4777 }
Owen Taylor3473f882001-02-23 17:55:21 +00004778 NEXT;
4779 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004780 if (ret != NULL)
4781 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004782 NEXT;
4783 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004784 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004785 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004786 cur = ret;
4787 /*
4788 * Some normalization:
4789 * (a | b* | c?)* == (a | b | c)*
4790 */
4791 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4792 if ((cur->c1 != NULL) &&
4793 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4794 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4795 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4796 if ((cur->c2 != NULL) &&
4797 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4798 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4799 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4800 cur = cur->c2;
4801 }
4802 }
Owen Taylor3473f882001-02-23 17:55:21 +00004803 NEXT;
4804 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004805 if (ret != NULL) {
4806 int found = 0;
4807
Daniel Veillarde470df72001-04-18 21:41:07 +00004808 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004809 /*
4810 * Some normalization:
4811 * (a | b*)+ == (a | b)*
4812 * (a | b?)+ == (a | b)*
4813 */
4814 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4815 if ((cur->c1 != NULL) &&
4816 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4817 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4818 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4819 found = 1;
4820 }
4821 if ((cur->c2 != NULL) &&
4822 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4823 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4824 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4825 found = 1;
4826 }
4827 cur = cur->c2;
4828 }
4829 if (found)
4830 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4831 }
Owen Taylor3473f882001-02-23 17:55:21 +00004832 NEXT;
4833 }
4834 return(ret);
4835}
4836
4837/**
4838 * xmlParseElementContentDecl:
4839 * @ctxt: an XML parser context
4840 * @name: the name of the element being defined.
4841 * @result: the Element Content pointer will be stored here if any
4842 *
4843 * parse the declaration for an Element content either Mixed or Children,
4844 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4845 *
4846 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4847 *
4848 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4849 */
4850
4851int
4852xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4853 xmlElementContentPtr *result) {
4854
4855 xmlElementContentPtr tree = NULL;
4856 xmlParserInputPtr input = ctxt->input;
4857 int res;
4858
4859 *result = NULL;
4860
4861 if (RAW != '(') {
4862 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4864 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004865 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004866 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004867 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004868 return(-1);
4869 }
4870 NEXT;
4871 GROW;
4872 SKIP_BLANKS;
4873 if ((RAW == '#') && (NXT(1) == 'P') &&
4874 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4875 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4876 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004877 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004878 res = XML_ELEMENT_TYPE_MIXED;
4879 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004880 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 res = XML_ELEMENT_TYPE_ELEMENT;
4882 }
Owen Taylor3473f882001-02-23 17:55:21 +00004883 SKIP_BLANKS;
4884 *result = tree;
4885 return(res);
4886}
4887
4888/**
4889 * xmlParseElementDecl:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse an Element declaration.
4893 *
4894 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4895 *
4896 * [ VC: Unique Element Type Declaration ]
4897 * No element type may be declared more than once
4898 *
4899 * Returns the type of the element, or -1 in case of error
4900 */
4901int
4902xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4903 xmlChar *name;
4904 int ret = -1;
4905 xmlElementContentPtr content = NULL;
4906
4907 GROW;
4908 if ((RAW == '<') && (NXT(1) == '!') &&
4909 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4910 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4911 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4912 (NXT(8) == 'T')) {
4913 xmlParserInputPtr input = ctxt->input;
4914
4915 SKIP(9);
4916 if (!IS_BLANK(CUR)) {
4917 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4919 ctxt->sax->error(ctxt->userData,
4920 "Space required after 'ELEMENT'\n");
4921 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004922 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004925 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004926 if (name == NULL) {
4927 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4929 ctxt->sax->error(ctxt->userData,
4930 "xmlParseElementDecl: no name for Element\n");
4931 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004932 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004933 return(-1);
4934 }
4935 while ((RAW == 0) && (ctxt->inputNr > 1))
4936 xmlPopInput(ctxt);
4937 if (!IS_BLANK(CUR)) {
4938 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4940 ctxt->sax->error(ctxt->userData,
4941 "Space required after the element name\n");
4942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004944 }
4945 SKIP_BLANKS;
4946 if ((RAW == 'E') && (NXT(1) == 'M') &&
4947 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4948 (NXT(4) == 'Y')) {
4949 SKIP(5);
4950 /*
4951 * Element must always be empty.
4952 */
4953 ret = XML_ELEMENT_TYPE_EMPTY;
4954 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4955 (NXT(2) == 'Y')) {
4956 SKIP(3);
4957 /*
4958 * Element is a generic container.
4959 */
4960 ret = XML_ELEMENT_TYPE_ANY;
4961 } else if (RAW == '(') {
4962 ret = xmlParseElementContentDecl(ctxt, name, &content);
4963 } else {
4964 /*
4965 * [ WFC: PEs in Internal Subset ] error handling.
4966 */
4967 if ((RAW == '%') && (ctxt->external == 0) &&
4968 (ctxt->inputNr == 1)) {
4969 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4971 ctxt->sax->error(ctxt->userData,
4972 "PEReference: forbidden within markup decl in internal subset\n");
4973 } else {
4974 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4976 ctxt->sax->error(ctxt->userData,
4977 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4978 }
4979 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004980 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 if (name != NULL) xmlFree(name);
4982 return(-1);
4983 }
4984
4985 SKIP_BLANKS;
4986 /*
4987 * Pop-up of finished entities.
4988 */
4989 while ((RAW == 0) && (ctxt->inputNr > 1))
4990 xmlPopInput(ctxt);
4991 SKIP_BLANKS;
4992
4993 if (RAW != '>') {
4994 ctxt->errNo = XML_ERR_GT_REQUIRED;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4996 ctxt->sax->error(ctxt->userData,
4997 "xmlParseElementDecl: expected '>' at the end\n");
4998 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00004999 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005000 } else {
5001 if (input != ctxt->input) {
5002 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5004 ctxt->sax->error(ctxt->userData,
5005"Element declaration doesn't start and stop in the same entity\n");
5006 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 }
5009
5010 NEXT;
5011 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5012 (ctxt->sax->elementDecl != NULL))
5013 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5014 content);
5015 }
5016 if (content != NULL) {
5017 xmlFreeElementContent(content);
5018 }
5019 if (name != NULL) {
5020 xmlFree(name);
5021 }
5022 }
5023 return(ret);
5024}
5025
5026/**
Owen Taylor3473f882001-02-23 17:55:21 +00005027 * xmlParseConditionalSections
5028 * @ctxt: an XML parser context
5029 *
5030 * [61] conditionalSect ::= includeSect | ignoreSect
5031 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5032 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5033 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5034 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5035 */
5036
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005037static void
Owen Taylor3473f882001-02-23 17:55:21 +00005038xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5039 SKIP(3);
5040 SKIP_BLANKS;
5041 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5042 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5043 (NXT(6) == 'E')) {
5044 SKIP(7);
5045 SKIP_BLANKS;
5046 if (RAW != '[') {
5047 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5049 ctxt->sax->error(ctxt->userData,
5050 "XML conditional section '[' expected\n");
5051 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005052 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005053 } else {
5054 NEXT;
5055 }
5056 if (xmlParserDebugEntities) {
5057 if ((ctxt->input != NULL) && (ctxt->input->filename))
5058 xmlGenericError(xmlGenericErrorContext,
5059 "%s(%d): ", ctxt->input->filename,
5060 ctxt->input->line);
5061 xmlGenericError(xmlGenericErrorContext,
5062 "Entering INCLUDE Conditional Section\n");
5063 }
5064
5065 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5066 (NXT(2) != '>'))) {
5067 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005068 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005069
5070 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5071 xmlParseConditionalSections(ctxt);
5072 } else if (IS_BLANK(CUR)) {
5073 NEXT;
5074 } else if (RAW == '%') {
5075 xmlParsePEReference(ctxt);
5076 } else
5077 xmlParseMarkupDecl(ctxt);
5078
5079 /*
5080 * Pop-up of finished entities.
5081 */
5082 while ((RAW == 0) && (ctxt->inputNr > 1))
5083 xmlPopInput(ctxt);
5084
Daniel Veillardfdc91562002-07-01 21:52:03 +00005085 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005086 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "Content error in the external subset\n");
5090 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005091 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005092 break;
5093 }
5094 }
5095 if (xmlParserDebugEntities) {
5096 if ((ctxt->input != NULL) && (ctxt->input->filename))
5097 xmlGenericError(xmlGenericErrorContext,
5098 "%s(%d): ", ctxt->input->filename,
5099 ctxt->input->line);
5100 xmlGenericError(xmlGenericErrorContext,
5101 "Leaving INCLUDE Conditional Section\n");
5102 }
5103
5104 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5105 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5106 int state;
5107 int instate;
5108 int depth = 0;
5109
5110 SKIP(6);
5111 SKIP_BLANKS;
5112 if (RAW != '[') {
5113 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5115 ctxt->sax->error(ctxt->userData,
5116 "XML conditional section '[' expected\n");
5117 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005118 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005119 } else {
5120 NEXT;
5121 }
5122 if (xmlParserDebugEntities) {
5123 if ((ctxt->input != NULL) && (ctxt->input->filename))
5124 xmlGenericError(xmlGenericErrorContext,
5125 "%s(%d): ", ctxt->input->filename,
5126 ctxt->input->line);
5127 xmlGenericError(xmlGenericErrorContext,
5128 "Entering IGNORE Conditional Section\n");
5129 }
5130
5131 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005132 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005133 * But disable SAX event generating DTD building in the meantime
5134 */
5135 state = ctxt->disableSAX;
5136 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005137 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 ctxt->instate = XML_PARSER_IGNORE;
5139
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005140 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005141 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5142 depth++;
5143 SKIP(3);
5144 continue;
5145 }
5146 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5147 if (--depth >= 0) SKIP(3);
5148 continue;
5149 }
5150 NEXT;
5151 continue;
5152 }
5153
5154 ctxt->disableSAX = state;
5155 ctxt->instate = instate;
5156
5157 if (xmlParserDebugEntities) {
5158 if ((ctxt->input != NULL) && (ctxt->input->filename))
5159 xmlGenericError(xmlGenericErrorContext,
5160 "%s(%d): ", ctxt->input->filename,
5161 ctxt->input->line);
5162 xmlGenericError(xmlGenericErrorContext,
5163 "Leaving IGNORE Conditional Section\n");
5164 }
5165
5166 } else {
5167 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
5168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5169 ctxt->sax->error(ctxt->userData,
5170 "XML conditional section INCLUDE or IGNORE keyword expected\n");
5171 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005172 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005173 }
5174
5175 if (RAW == 0)
5176 SHRINK;
5177
5178 if (RAW == 0) {
5179 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
5180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5181 ctxt->sax->error(ctxt->userData,
5182 "XML conditional section not closed\n");
5183 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005184 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005185 } else {
5186 SKIP(3);
5187 }
5188}
5189
5190/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005191 * xmlParseMarkupDecl:
5192 * @ctxt: an XML parser context
5193 *
5194 * parse Markup declarations
5195 *
5196 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5197 * NotationDecl | PI | Comment
5198 *
5199 * [ VC: Proper Declaration/PE Nesting ]
5200 * Parameter-entity replacement text must be properly nested with
5201 * markup declarations. That is to say, if either the first character
5202 * or the last character of a markup declaration (markupdecl above) is
5203 * contained in the replacement text for a parameter-entity reference,
5204 * both must be contained in the same replacement text.
5205 *
5206 * [ WFC: PEs in Internal Subset ]
5207 * In the internal DTD subset, parameter-entity references can occur
5208 * only where markup declarations can occur, not within markup declarations.
5209 * (This does not apply to references that occur in external parameter
5210 * entities or to the external subset.)
5211 */
5212void
5213xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5214 GROW;
5215 xmlParseElementDecl(ctxt);
5216 xmlParseAttributeListDecl(ctxt);
5217 xmlParseEntityDecl(ctxt);
5218 xmlParseNotationDecl(ctxt);
5219 xmlParsePI(ctxt);
5220 xmlParseComment(ctxt);
5221 /*
5222 * This is only for internal subset. On external entities,
5223 * the replacement is done before parsing stage
5224 */
5225 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5226 xmlParsePEReference(ctxt);
5227
5228 /*
5229 * Conditional sections are allowed from entities included
5230 * by PE References in the internal subset.
5231 */
5232 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5233 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5234 xmlParseConditionalSections(ctxt);
5235 }
5236 }
5237
5238 ctxt->instate = XML_PARSER_DTD;
5239}
5240
5241/**
5242 * xmlParseTextDecl:
5243 * @ctxt: an XML parser context
5244 *
5245 * parse an XML declaration header for external entities
5246 *
5247 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5248 *
5249 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5250 */
5251
5252void
5253xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5254 xmlChar *version;
5255
5256 /*
5257 * We know that '<?xml' is here.
5258 */
5259 if ((RAW == '<') && (NXT(1) == '?') &&
5260 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5261 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5262 SKIP(5);
5263 } else {
5264 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5266 ctxt->sax->error(ctxt->userData,
5267 "Text declaration '<?xml' required\n");
5268 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005269 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005270
5271 return;
5272 }
5273
5274 if (!IS_BLANK(CUR)) {
5275 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5277 ctxt->sax->error(ctxt->userData,
5278 "Space needed after '<?xml'\n");
5279 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005280 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005281 }
5282 SKIP_BLANKS;
5283
5284 /*
5285 * We may have the VersionInfo here.
5286 */
5287 version = xmlParseVersionInfo(ctxt);
5288 if (version == NULL)
5289 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005290 else {
5291 if (!IS_BLANK(CUR)) {
5292 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5294 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5295 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005296 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard401c2112002-01-07 16:54:10 +00005297 }
5298 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005299 ctxt->input->version = version;
5300
5301 /*
5302 * We must have the encoding declaration
5303 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005304 xmlParseEncodingDecl(ctxt);
5305 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5306 /*
5307 * The XML REC instructs us to stop parsing right here
5308 */
5309 return;
5310 }
5311
5312 SKIP_BLANKS;
5313 if ((RAW == '?') && (NXT(1) == '>')) {
5314 SKIP(2);
5315 } else if (RAW == '>') {
5316 /* Deprecated old WD ... */
5317 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5319 ctxt->sax->error(ctxt->userData,
5320 "XML declaration must end-up with '?>'\n");
5321 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005322 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005323 NEXT;
5324 } else {
5325 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "parsing XML declaration: '?>' expected\n");
5329 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005331 MOVETO_ENDTAG(CUR_PTR);
5332 NEXT;
5333 }
5334}
5335
5336/**
Owen Taylor3473f882001-02-23 17:55:21 +00005337 * xmlParseExternalSubset:
5338 * @ctxt: an XML parser context
5339 * @ExternalID: the external identifier
5340 * @SystemID: the system identifier (or URL)
5341 *
5342 * parse Markup declarations from an external subset
5343 *
5344 * [30] extSubset ::= textDecl? extSubsetDecl
5345 *
5346 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5347 */
5348void
5349xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5350 const xmlChar *SystemID) {
5351 GROW;
5352 if ((RAW == '<') && (NXT(1) == '?') &&
5353 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5354 (NXT(4) == 'l')) {
5355 xmlParseTextDecl(ctxt);
5356 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5357 /*
5358 * The XML REC instructs us to stop parsing right here
5359 */
5360 ctxt->instate = XML_PARSER_EOF;
5361 return;
5362 }
5363 }
5364 if (ctxt->myDoc == NULL) {
5365 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5366 }
5367 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5368 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5369
5370 ctxt->instate = XML_PARSER_DTD;
5371 ctxt->external = 1;
5372 while (((RAW == '<') && (NXT(1) == '?')) ||
5373 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005374 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005375 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005376 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005377
5378 GROW;
5379 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5380 xmlParseConditionalSections(ctxt);
5381 } else if (IS_BLANK(CUR)) {
5382 NEXT;
5383 } else if (RAW == '%') {
5384 xmlParsePEReference(ctxt);
5385 } else
5386 xmlParseMarkupDecl(ctxt);
5387
5388 /*
5389 * Pop-up of finished entities.
5390 */
5391 while ((RAW == 0) && (ctxt->inputNr > 1))
5392 xmlPopInput(ctxt);
5393
Daniel Veillardfdc91562002-07-01 21:52:03 +00005394 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005395 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5397 ctxt->sax->error(ctxt->userData,
5398 "Content error in the external subset\n");
5399 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005400 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005401 break;
5402 }
5403 }
5404
5405 if (RAW != 0) {
5406 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5408 ctxt->sax->error(ctxt->userData,
5409 "Extra content at the end of the document\n");
5410 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005411 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005412 }
5413
5414}
5415
5416/**
5417 * xmlParseReference:
5418 * @ctxt: an XML parser context
5419 *
5420 * parse and handle entity references in content, depending on the SAX
5421 * interface, this may end-up in a call to character() if this is a
5422 * CharRef, a predefined entity, if there is no reference() callback.
5423 * or if the parser was asked to switch to that mode.
5424 *
5425 * [67] Reference ::= EntityRef | CharRef
5426 */
5427void
5428xmlParseReference(xmlParserCtxtPtr ctxt) {
5429 xmlEntityPtr ent;
5430 xmlChar *val;
5431 if (RAW != '&') return;
5432
5433 if (NXT(1) == '#') {
5434 int i = 0;
5435 xmlChar out[10];
5436 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005437 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005438
5439 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5440 /*
5441 * So we are using non-UTF-8 buffers
5442 * Check that the char fit on 8bits, if not
5443 * generate a CharRef.
5444 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005445 if (value <= 0xFF) {
5446 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005447 out[1] = 0;
5448 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5449 (!ctxt->disableSAX))
5450 ctxt->sax->characters(ctxt->userData, out, 1);
5451 } else {
5452 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005453 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005454 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005455 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5457 (!ctxt->disableSAX))
5458 ctxt->sax->reference(ctxt->userData, out);
5459 }
5460 } else {
5461 /*
5462 * Just encode the value in UTF-8
5463 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005464 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005465 out[i] = 0;
5466 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5467 (!ctxt->disableSAX))
5468 ctxt->sax->characters(ctxt->userData, out, i);
5469 }
5470 } else {
5471 ent = xmlParseEntityRef(ctxt);
5472 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005473 if (!ctxt->wellFormed)
5474 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005475 if ((ent->name != NULL) &&
5476 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5477 xmlNodePtr list = NULL;
5478 int ret;
5479
5480
5481 /*
5482 * The first reference to the entity trigger a parsing phase
5483 * where the ent->children is filled with the result from
5484 * the parsing.
5485 */
5486 if (ent->children == NULL) {
5487 xmlChar *value;
5488 value = ent->content;
5489
5490 /*
5491 * Check that this entity is well formed
5492 */
5493 if ((value != NULL) &&
5494 (value[1] == 0) && (value[0] == '<') &&
5495 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5496 /*
5497 * DONE: get definite answer on this !!!
5498 * Lots of entity decls are used to declare a single
5499 * char
5500 * <!ENTITY lt "<">
5501 * Which seems to be valid since
5502 * 2.4: The ampersand character (&) and the left angle
5503 * bracket (<) may appear in their literal form only
5504 * when used ... They are also legal within the literal
5505 * entity value of an internal entity declaration;i
5506 * see "4.3.2 Well-Formed Parsed Entities".
5507 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5508 * Looking at the OASIS test suite and James Clark
5509 * tests, this is broken. However the XML REC uses
5510 * it. Is the XML REC not well-formed ????
5511 * This is a hack to avoid this problem
5512 *
5513 * ANSWER: since lt gt amp .. are already defined,
5514 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005515 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005516 * is lousy but acceptable.
5517 */
5518 list = xmlNewDocText(ctxt->myDoc, value);
5519 if (list != NULL) {
5520 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5521 (ent->children == NULL)) {
5522 ent->children = list;
5523 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005524 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 list->parent = (xmlNodePtr) ent;
5526 } else {
5527 xmlFreeNodeList(list);
5528 }
5529 } else if (list != NULL) {
5530 xmlFreeNodeList(list);
5531 }
5532 } else {
5533 /*
5534 * 4.3.2: An internal general parsed entity is well-formed
5535 * if its replacement text matches the production labeled
5536 * content.
5537 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005538
5539 void *user_data;
5540 /*
5541 * This is a bit hackish but this seems the best
5542 * way to make sure both SAX and DOM entity support
5543 * behaves okay.
5544 */
5545 if (ctxt->userData == ctxt)
5546 user_data = NULL;
5547 else
5548 user_data = ctxt->userData;
5549
Owen Taylor3473f882001-02-23 17:55:21 +00005550 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5551 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005552 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5553 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 ctxt->depth--;
5555 } else if (ent->etype ==
5556 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5557 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005558 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005559 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005560 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 ctxt->depth--;
5562 } else {
5563 ret = -1;
5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5565 ctxt->sax->error(ctxt->userData,
5566 "Internal: invalid entity type\n");
5567 }
5568 if (ret == XML_ERR_ENTITY_LOOP) {
5569 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5571 ctxt->sax->error(ctxt->userData,
5572 "Detected entity reference loop\n");
5573 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005574 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005575 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005577 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5578 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005579 (ent->children == NULL)) {
5580 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005581 if (ctxt->replaceEntities) {
5582 /*
5583 * Prune it directly in the generated document
5584 * except for single text nodes.
5585 */
5586 if ((list->type == XML_TEXT_NODE) &&
5587 (list->next == NULL)) {
5588 list->parent = (xmlNodePtr) ent;
5589 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005590 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005591 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005592 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005593 while (list != NULL) {
5594 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005595 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005596 if (list->next == NULL)
5597 ent->last = list;
5598 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005599 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005601 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5602 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005603 }
5604 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005605 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005606 while (list != NULL) {
5607 list->parent = (xmlNodePtr) ent;
5608 if (list->next == NULL)
5609 ent->last = list;
5610 list = list->next;
5611 }
Owen Taylor3473f882001-02-23 17:55:21 +00005612 }
5613 } else {
5614 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005615 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005616 }
5617 } else if (ret > 0) {
5618 ctxt->errNo = ret;
5619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620 ctxt->sax->error(ctxt->userData,
5621 "Entity value required\n");
5622 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005623 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005624 } else if (list != NULL) {
5625 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005626 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005627 }
5628 }
5629 }
5630 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5631 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5632 /*
5633 * Create a node.
5634 */
5635 ctxt->sax->reference(ctxt->userData, ent->name);
5636 return;
5637 } else if (ctxt->replaceEntities) {
5638 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5639 /*
5640 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005641 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005642 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005643 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005644 if ((list == NULL) && (ent->owner == 0)) {
5645 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005646 cur = ent->children;
5647 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005648 nw = xmlCopyNode(cur, 1);
5649 if (nw != NULL) {
5650 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005651 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005652 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005653 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005654 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005655 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005656 if (cur == ent->last)
5657 break;
5658 cur = cur->next;
5659 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005660 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005661 xmlAddEntityReference(ent, firstChild, nw);
5662 } else if (list == NULL) {
5663 xmlNodePtr nw = NULL, cur, next, last,
5664 firstChild = NULL;
5665 /*
5666 * Copy the entity child list and make it the new
5667 * entity child list. The goal is to make sure any
5668 * ID or REF referenced will be the one from the
5669 * document content and not the entity copy.
5670 */
5671 cur = ent->children;
5672 ent->children = NULL;
5673 last = ent->last;
5674 ent->last = NULL;
5675 while (cur != NULL) {
5676 next = cur->next;
5677 cur->next = NULL;
5678 cur->parent = NULL;
5679 nw = xmlCopyNode(cur, 1);
5680 if (nw != NULL) {
5681 nw->_private = cur->_private;
5682 if (firstChild == NULL){
5683 firstChild = cur;
5684 }
5685 xmlAddChild((xmlNodePtr) ent, nw);
5686 xmlAddChild(ctxt->node, cur);
5687 }
5688 if (cur == last)
5689 break;
5690 cur = next;
5691 }
5692 ent->owner = 1;
5693 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5694 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005695 } else {
5696 /*
5697 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005698 * node with a possible previous text one which
5699 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005700 */
5701 if (ent->children->type == XML_TEXT_NODE)
5702 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5703 if ((ent->last != ent->children) &&
5704 (ent->last->type == XML_TEXT_NODE))
5705 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5706 xmlAddChildList(ctxt->node, ent->children);
5707 }
5708
Owen Taylor3473f882001-02-23 17:55:21 +00005709 /*
5710 * This is to avoid a nasty side effect, see
5711 * characters() in SAX.c
5712 */
5713 ctxt->nodemem = 0;
5714 ctxt->nodelen = 0;
5715 return;
5716 } else {
5717 /*
5718 * Probably running in SAX mode
5719 */
5720 xmlParserInputPtr input;
5721
5722 input = xmlNewEntityInputStream(ctxt, ent);
5723 xmlPushInput(ctxt, input);
5724 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5725 (RAW == '<') && (NXT(1) == '?') &&
5726 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5727 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5728 xmlParseTextDecl(ctxt);
5729 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5730 /*
5731 * The XML REC instructs us to stop parsing right here
5732 */
5733 ctxt->instate = XML_PARSER_EOF;
5734 return;
5735 }
5736 if (input->standalone == 1) {
5737 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5739 ctxt->sax->error(ctxt->userData,
5740 "external parsed entities cannot be standalone\n");
5741 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005742 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005743 }
5744 }
5745 return;
5746 }
5747 }
5748 } else {
5749 val = ent->content;
5750 if (val == NULL) return;
5751 /*
5752 * inline the entity.
5753 */
5754 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5755 (!ctxt->disableSAX))
5756 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5757 }
5758 }
5759}
5760
5761/**
5762 * xmlParseEntityRef:
5763 * @ctxt: an XML parser context
5764 *
5765 * parse ENTITY references declarations
5766 *
5767 * [68] EntityRef ::= '&' Name ';'
5768 *
5769 * [ WFC: Entity Declared ]
5770 * In a document without any DTD, a document with only an internal DTD
5771 * subset which contains no parameter entity references, or a document
5772 * with "standalone='yes'", the Name given in the entity reference
5773 * must match that in an entity declaration, except that well-formed
5774 * documents need not declare any of the following entities: amp, lt,
5775 * gt, apos, quot. The declaration of a parameter entity must precede
5776 * any reference to it. Similarly, the declaration of a general entity
5777 * must precede any reference to it which appears in a default value in an
5778 * attribute-list declaration. Note that if entities are declared in the
5779 * external subset or in external parameter entities, a non-validating
5780 * processor is not obligated to read and process their declarations;
5781 * for such documents, the rule that an entity must be declared is a
5782 * well-formedness constraint only if standalone='yes'.
5783 *
5784 * [ WFC: Parsed Entity ]
5785 * An entity reference must not contain the name of an unparsed entity
5786 *
5787 * Returns the xmlEntityPtr if found, or NULL otherwise.
5788 */
5789xmlEntityPtr
5790xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5791 xmlChar *name;
5792 xmlEntityPtr ent = NULL;
5793
5794 GROW;
5795
5796 if (RAW == '&') {
5797 NEXT;
5798 name = xmlParseName(ctxt);
5799 if (name == NULL) {
5800 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5802 ctxt->sax->error(ctxt->userData,
5803 "xmlParseEntityRef: no name\n");
5804 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005805 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005806 } else {
5807 if (RAW == ';') {
5808 NEXT;
5809 /*
5810 * Ask first SAX for entity resolution, otherwise try the
5811 * predefined set.
5812 */
5813 if (ctxt->sax != NULL) {
5814 if (ctxt->sax->getEntity != NULL)
5815 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005816 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005817 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005818 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5819 (ctxt->userData==ctxt)) {
Daniel Veillard5997aca2002-03-18 18:36:20 +00005820 ent = getEntity(ctxt, name);
5821 }
Owen Taylor3473f882001-02-23 17:55:21 +00005822 }
5823 /*
5824 * [ WFC: Entity Declared ]
5825 * In a document without any DTD, a document with only an
5826 * internal DTD subset which contains no parameter entity
5827 * references, or a document with "standalone='yes'", the
5828 * Name given in the entity reference must match that in an
5829 * entity declaration, except that well-formed documents
5830 * need not declare any of the following entities: amp, lt,
5831 * gt, apos, quot.
5832 * The declaration of a parameter entity must precede any
5833 * reference to it.
5834 * Similarly, the declaration of a general entity must
5835 * precede any reference to it which appears in a default
5836 * value in an attribute-list declaration. Note that if
5837 * entities are declared in the external subset or in
5838 * external parameter entities, a non-validating processor
5839 * is not obligated to read and process their declarations;
5840 * for such documents, the rule that an entity must be
5841 * declared is a well-formedness constraint only if
5842 * standalone='yes'.
5843 */
5844 if (ent == NULL) {
5845 if ((ctxt->standalone == 1) ||
5846 ((ctxt->hasExternalSubset == 0) &&
5847 (ctxt->hasPErefs == 0))) {
5848 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
5851 "Entity '%s' not defined\n", name);
5852 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005853 ctxt->valid = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005854 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005855 } else {
5856 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005858 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005859 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005860 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005861 }
5862 }
5863
5864 /*
5865 * [ WFC: Parsed Entity ]
5866 * An entity reference must not contain the name of an
5867 * unparsed entity
5868 */
5869 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5870 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5872 ctxt->sax->error(ctxt->userData,
5873 "Entity reference to unparsed entity %s\n", name);
5874 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005875 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005876 }
5877
5878 /*
5879 * [ WFC: No External Entity References ]
5880 * Attribute values cannot contain direct or indirect
5881 * entity references to external entities.
5882 */
5883 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5884 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5885 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5887 ctxt->sax->error(ctxt->userData,
5888 "Attribute references external entity '%s'\n", name);
5889 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005890 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005891 }
5892 /*
5893 * [ WFC: No < in Attribute Values ]
5894 * The replacement text of any entity referred to directly or
5895 * indirectly in an attribute value (other than "&lt;") must
5896 * not contain a <.
5897 */
5898 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5899 (ent != NULL) &&
5900 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5901 (ent->content != NULL) &&
5902 (xmlStrchr(ent->content, '<'))) {
5903 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5905 ctxt->sax->error(ctxt->userData,
5906 "'<' in entity '%s' is not allowed in attributes values\n", name);
5907 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005908 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005909 }
5910
5911 /*
5912 * Internal check, no parameter entities here ...
5913 */
5914 else {
5915 switch (ent->etype) {
5916 case XML_INTERNAL_PARAMETER_ENTITY:
5917 case XML_EXTERNAL_PARAMETER_ENTITY:
5918 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5920 ctxt->sax->error(ctxt->userData,
5921 "Attempt to reference the parameter entity '%s'\n", name);
5922 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005923 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005924 break;
5925 default:
5926 break;
5927 }
5928 }
5929
5930 /*
5931 * [ WFC: No Recursion ]
5932 * A parsed entity must not contain a recursive reference
5933 * to itself, either directly or indirectly.
5934 * Done somewhere else
5935 */
5936
5937 } else {
5938 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5940 ctxt->sax->error(ctxt->userData,
5941 "xmlParseEntityRef: expecting ';'\n");
5942 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005943 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005944 }
5945 xmlFree(name);
5946 }
5947 }
5948 return(ent);
5949}
5950
5951/**
5952 * xmlParseStringEntityRef:
5953 * @ctxt: an XML parser context
5954 * @str: a pointer to an index in the string
5955 *
5956 * parse ENTITY references declarations, but this version parses it from
5957 * a string value.
5958 *
5959 * [68] EntityRef ::= '&' Name ';'
5960 *
5961 * [ WFC: Entity Declared ]
5962 * In a document without any DTD, a document with only an internal DTD
5963 * subset which contains no parameter entity references, or a document
5964 * with "standalone='yes'", the Name given in the entity reference
5965 * must match that in an entity declaration, except that well-formed
5966 * documents need not declare any of the following entities: amp, lt,
5967 * gt, apos, quot. The declaration of a parameter entity must precede
5968 * any reference to it. Similarly, the declaration of a general entity
5969 * must precede any reference to it which appears in a default value in an
5970 * attribute-list declaration. Note that if entities are declared in the
5971 * external subset or in external parameter entities, a non-validating
5972 * processor is not obligated to read and process their declarations;
5973 * for such documents, the rule that an entity must be declared is a
5974 * well-formedness constraint only if standalone='yes'.
5975 *
5976 * [ WFC: Parsed Entity ]
5977 * An entity reference must not contain the name of an unparsed entity
5978 *
5979 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5980 * is updated to the current location in the string.
5981 */
5982xmlEntityPtr
5983xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5984 xmlChar *name;
5985 const xmlChar *ptr;
5986 xmlChar cur;
5987 xmlEntityPtr ent = NULL;
5988
5989 if ((str == NULL) || (*str == NULL))
5990 return(NULL);
5991 ptr = *str;
5992 cur = *ptr;
5993 if (cur == '&') {
5994 ptr++;
5995 cur = *ptr;
5996 name = xmlParseStringName(ctxt, &ptr);
5997 if (name == NULL) {
5998 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6000 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006001 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006002 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006003 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006004 } else {
6005 if (*ptr == ';') {
6006 ptr++;
6007 /*
6008 * Ask first SAX for entity resolution, otherwise try the
6009 * predefined set.
6010 */
6011 if (ctxt->sax != NULL) {
6012 if (ctxt->sax->getEntity != NULL)
6013 ent = ctxt->sax->getEntity(ctxt->userData, name);
6014 if (ent == NULL)
6015 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006016 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6017 ent = getEntity(ctxt, name);
6018 }
Owen Taylor3473f882001-02-23 17:55:21 +00006019 }
6020 /*
6021 * [ WFC: Entity Declared ]
6022 * In a document without any DTD, a document with only an
6023 * internal DTD subset which contains no parameter entity
6024 * references, or a document with "standalone='yes'", the
6025 * Name given in the entity reference must match that in an
6026 * entity declaration, except that well-formed documents
6027 * need not declare any of the following entities: amp, lt,
6028 * gt, apos, quot.
6029 * The declaration of a parameter entity must precede any
6030 * reference to it.
6031 * Similarly, the declaration of a general entity must
6032 * precede any reference to it which appears in a default
6033 * value in an attribute-list declaration. Note that if
6034 * entities are declared in the external subset or in
6035 * external parameter entities, a non-validating processor
6036 * is not obligated to read and process their declarations;
6037 * for such documents, the rule that an entity must be
6038 * declared is a well-formedness constraint only if
6039 * standalone='yes'.
6040 */
6041 if (ent == NULL) {
6042 if ((ctxt->standalone == 1) ||
6043 ((ctxt->hasExternalSubset == 0) &&
6044 (ctxt->hasPErefs == 0))) {
6045 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6047 ctxt->sax->error(ctxt->userData,
6048 "Entity '%s' not defined\n", name);
6049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006051 } else {
6052 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6053 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6054 ctxt->sax->warning(ctxt->userData,
6055 "Entity '%s' not defined\n", name);
6056 }
6057 }
6058
6059 /*
6060 * [ WFC: Parsed Entity ]
6061 * An entity reference must not contain the name of an
6062 * unparsed entity
6063 */
6064 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6065 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6067 ctxt->sax->error(ctxt->userData,
6068 "Entity reference to unparsed entity %s\n", name);
6069 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006070 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006071 }
6072
6073 /*
6074 * [ WFC: No External Entity References ]
6075 * Attribute values cannot contain direct or indirect
6076 * entity references to external entities.
6077 */
6078 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6079 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6080 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6082 ctxt->sax->error(ctxt->userData,
6083 "Attribute references external entity '%s'\n", name);
6084 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006085 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006086 }
6087 /*
6088 * [ WFC: No < in Attribute Values ]
6089 * The replacement text of any entity referred to directly or
6090 * indirectly in an attribute value (other than "&lt;") must
6091 * not contain a <.
6092 */
6093 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6094 (ent != NULL) &&
6095 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6096 (ent->content != NULL) &&
6097 (xmlStrchr(ent->content, '<'))) {
6098 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6100 ctxt->sax->error(ctxt->userData,
6101 "'<' in entity '%s' is not allowed in attributes values\n", name);
6102 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006103 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 }
6105
6106 /*
6107 * Internal check, no parameter entities here ...
6108 */
6109 else {
6110 switch (ent->etype) {
6111 case XML_INTERNAL_PARAMETER_ENTITY:
6112 case XML_EXTERNAL_PARAMETER_ENTITY:
6113 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6115 ctxt->sax->error(ctxt->userData,
6116 "Attempt to reference the parameter entity '%s'\n", name);
6117 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006118 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006119 break;
6120 default:
6121 break;
6122 }
6123 }
6124
6125 /*
6126 * [ WFC: No Recursion ]
6127 * A parsed entity must not contain a recursive reference
6128 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006129 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006130 */
6131
6132 } else {
6133 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6135 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00006136 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006137 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006138 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006139 }
6140 xmlFree(name);
6141 }
6142 }
6143 *str = ptr;
6144 return(ent);
6145}
6146
6147/**
6148 * xmlParsePEReference:
6149 * @ctxt: an XML parser context
6150 *
6151 * parse PEReference declarations
6152 * The entity content is handled directly by pushing it's content as
6153 * a new input stream.
6154 *
6155 * [69] PEReference ::= '%' Name ';'
6156 *
6157 * [ WFC: No Recursion ]
6158 * A parsed entity must not contain a recursive
6159 * reference to itself, either directly or indirectly.
6160 *
6161 * [ WFC: Entity Declared ]
6162 * In a document without any DTD, a document with only an internal DTD
6163 * subset which contains no parameter entity references, or a document
6164 * with "standalone='yes'", ... ... The declaration of a parameter
6165 * entity must precede any reference to it...
6166 *
6167 * [ VC: Entity Declared ]
6168 * In a document with an external subset or external parameter entities
6169 * with "standalone='no'", ... ... The declaration of a parameter entity
6170 * must precede any reference to it...
6171 *
6172 * [ WFC: In DTD ]
6173 * Parameter-entity references may only appear in the DTD.
6174 * NOTE: misleading but this is handled.
6175 */
6176void
6177xmlParsePEReference(xmlParserCtxtPtr ctxt) {
6178 xmlChar *name;
6179 xmlEntityPtr entity = NULL;
6180 xmlParserInputPtr input;
6181
6182 if (RAW == '%') {
6183 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006184 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006185 if (name == NULL) {
6186 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6188 ctxt->sax->error(ctxt->userData,
6189 "xmlParsePEReference: no name\n");
6190 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006191 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006192 } else {
6193 if (RAW == ';') {
6194 NEXT;
6195 if ((ctxt->sax != NULL) &&
6196 (ctxt->sax->getParameterEntity != NULL))
6197 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6198 name);
6199 if (entity == NULL) {
6200 /*
6201 * [ WFC: Entity Declared ]
6202 * In a document without any DTD, a document with only an
6203 * internal DTD subset which contains no parameter entity
6204 * references, or a document with "standalone='yes'", ...
6205 * ... The declaration of a parameter entity must precede
6206 * any reference to it...
6207 */
6208 if ((ctxt->standalone == 1) ||
6209 ((ctxt->hasExternalSubset == 0) &&
6210 (ctxt->hasPErefs == 0))) {
6211 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6212 if ((!ctxt->disableSAX) &&
6213 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6214 ctxt->sax->error(ctxt->userData,
6215 "PEReference: %%%s; not found\n", name);
6216 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006217 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006218 } else {
6219 /*
6220 * [ VC: Entity Declared ]
6221 * In a document with an external subset or external
6222 * parameter entities with "standalone='no'", ...
6223 * ... The declaration of a parameter entity must precede
6224 * any reference to it...
6225 */
6226 if ((!ctxt->disableSAX) &&
6227 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6228 ctxt->sax->warning(ctxt->userData,
6229 "PEReference: %%%s; not found\n", name);
6230 ctxt->valid = 0;
6231 }
6232 } else {
6233 /*
6234 * Internal checking in case the entity quest barfed
6235 */
6236 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6237 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6238 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6239 ctxt->sax->warning(ctxt->userData,
6240 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006241 } else if (ctxt->input->free != deallocblankswrapper) {
6242 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6243 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006244 } else {
6245 /*
6246 * TODO !!!
6247 * handle the extra spaces added before and after
6248 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6249 */
6250 input = xmlNewEntityInputStream(ctxt, entity);
6251 xmlPushInput(ctxt, input);
6252 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6253 (RAW == '<') && (NXT(1) == '?') &&
6254 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6255 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6256 xmlParseTextDecl(ctxt);
6257 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6258 /*
6259 * The XML REC instructs us to stop parsing
6260 * right here
6261 */
6262 ctxt->instate = XML_PARSER_EOF;
6263 xmlFree(name);
6264 return;
6265 }
6266 }
Owen Taylor3473f882001-02-23 17:55:21 +00006267 }
6268 }
6269 ctxt->hasPErefs = 1;
6270 } else {
6271 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6273 ctxt->sax->error(ctxt->userData,
6274 "xmlParsePEReference: expecting ';'\n");
6275 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006276 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006277 }
6278 xmlFree(name);
6279 }
6280 }
6281}
6282
6283/**
6284 * xmlParseStringPEReference:
6285 * @ctxt: an XML parser context
6286 * @str: a pointer to an index in the string
6287 *
6288 * parse PEReference declarations
6289 *
6290 * [69] PEReference ::= '%' Name ';'
6291 *
6292 * [ WFC: No Recursion ]
6293 * A parsed entity must not contain a recursive
6294 * reference to itself, either directly or indirectly.
6295 *
6296 * [ WFC: Entity Declared ]
6297 * In a document without any DTD, a document with only an internal DTD
6298 * subset which contains no parameter entity references, or a document
6299 * with "standalone='yes'", ... ... The declaration of a parameter
6300 * entity must precede any reference to it...
6301 *
6302 * [ VC: Entity Declared ]
6303 * In a document with an external subset or external parameter entities
6304 * with "standalone='no'", ... ... The declaration of a parameter entity
6305 * must precede any reference to it...
6306 *
6307 * [ WFC: In DTD ]
6308 * Parameter-entity references may only appear in the DTD.
6309 * NOTE: misleading but this is handled.
6310 *
6311 * Returns the string of the entity content.
6312 * str is updated to the current value of the index
6313 */
6314xmlEntityPtr
6315xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6316 const xmlChar *ptr;
6317 xmlChar cur;
6318 xmlChar *name;
6319 xmlEntityPtr entity = NULL;
6320
6321 if ((str == NULL) || (*str == NULL)) return(NULL);
6322 ptr = *str;
6323 cur = *ptr;
6324 if (cur == '%') {
6325 ptr++;
6326 cur = *ptr;
6327 name = xmlParseStringName(ctxt, &ptr);
6328 if (name == NULL) {
6329 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6331 ctxt->sax->error(ctxt->userData,
6332 "xmlParseStringPEReference: no name\n");
6333 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006334 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006335 } else {
6336 cur = *ptr;
6337 if (cur == ';') {
6338 ptr++;
6339 cur = *ptr;
6340 if ((ctxt->sax != NULL) &&
6341 (ctxt->sax->getParameterEntity != NULL))
6342 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6343 name);
6344 if (entity == NULL) {
6345 /*
6346 * [ WFC: Entity Declared ]
6347 * In a document without any DTD, a document with only an
6348 * internal DTD subset which contains no parameter entity
6349 * references, or a document with "standalone='yes'", ...
6350 * ... The declaration of a parameter entity must precede
6351 * any reference to it...
6352 */
6353 if ((ctxt->standalone == 1) ||
6354 ((ctxt->hasExternalSubset == 0) &&
6355 (ctxt->hasPErefs == 0))) {
6356 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6358 ctxt->sax->error(ctxt->userData,
6359 "PEReference: %%%s; not found\n", name);
6360 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006361 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006362 } else {
6363 /*
6364 * [ VC: Entity Declared ]
6365 * In a document with an external subset or external
6366 * parameter entities with "standalone='no'", ...
6367 * ... The declaration of a parameter entity must
6368 * precede any reference to it...
6369 */
6370 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6371 ctxt->sax->warning(ctxt->userData,
6372 "PEReference: %%%s; not found\n", name);
6373 ctxt->valid = 0;
6374 }
6375 } else {
6376 /*
6377 * Internal checking in case the entity quest barfed
6378 */
6379 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6380 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6381 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6382 ctxt->sax->warning(ctxt->userData,
6383 "Internal: %%%s; is not a parameter entity\n", name);
6384 }
6385 }
6386 ctxt->hasPErefs = 1;
6387 } else {
6388 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6390 ctxt->sax->error(ctxt->userData,
6391 "xmlParseStringPEReference: expecting ';'\n");
6392 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006393 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006394 }
6395 xmlFree(name);
6396 }
6397 }
6398 *str = ptr;
6399 return(entity);
6400}
6401
6402/**
6403 * xmlParseDocTypeDecl:
6404 * @ctxt: an XML parser context
6405 *
6406 * parse a DOCTYPE declaration
6407 *
6408 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6409 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6410 *
6411 * [ VC: Root Element Type ]
6412 * The Name in the document type declaration must match the element
6413 * type of the root element.
6414 */
6415
6416void
6417xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6418 xmlChar *name = NULL;
6419 xmlChar *ExternalID = NULL;
6420 xmlChar *URI = NULL;
6421
6422 /*
6423 * We know that '<!DOCTYPE' has been detected.
6424 */
6425 SKIP(9);
6426
6427 SKIP_BLANKS;
6428
6429 /*
6430 * Parse the DOCTYPE name.
6431 */
6432 name = xmlParseName(ctxt);
6433 if (name == NULL) {
6434 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6436 ctxt->sax->error(ctxt->userData,
6437 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006440 }
6441 ctxt->intSubName = name;
6442
6443 SKIP_BLANKS;
6444
6445 /*
6446 * Check for SystemID and ExternalID
6447 */
6448 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6449
6450 if ((URI != NULL) || (ExternalID != NULL)) {
6451 ctxt->hasExternalSubset = 1;
6452 }
6453 ctxt->extSubURI = URI;
6454 ctxt->extSubSystem = ExternalID;
6455
6456 SKIP_BLANKS;
6457
6458 /*
6459 * Create and update the internal subset.
6460 */
6461 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6462 (!ctxt->disableSAX))
6463 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6464
6465 /*
6466 * Is there any internal subset declarations ?
6467 * they are handled separately in xmlParseInternalSubset()
6468 */
6469 if (RAW == '[')
6470 return;
6471
6472 /*
6473 * We should be at the end of the DOCTYPE declaration.
6474 */
6475 if (RAW != '>') {
6476 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006478 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006479 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006480 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006481 }
6482 NEXT;
6483}
6484
6485/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006486 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006487 * @ctxt: an XML parser context
6488 *
6489 * parse the internal subset declaration
6490 *
6491 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6492 */
6493
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006494static void
Owen Taylor3473f882001-02-23 17:55:21 +00006495xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6496 /*
6497 * Is there any DTD definition ?
6498 */
6499 if (RAW == '[') {
6500 ctxt->instate = XML_PARSER_DTD;
6501 NEXT;
6502 /*
6503 * Parse the succession of Markup declarations and
6504 * PEReferences.
6505 * Subsequence (markupdecl | PEReference | S)*
6506 */
6507 while (RAW != ']') {
6508 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006509 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006510
6511 SKIP_BLANKS;
6512 xmlParseMarkupDecl(ctxt);
6513 xmlParsePEReference(ctxt);
6514
6515 /*
6516 * Pop-up of finished entities.
6517 */
6518 while ((RAW == 0) && (ctxt->inputNr > 1))
6519 xmlPopInput(ctxt);
6520
6521 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6522 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6524 ctxt->sax->error(ctxt->userData,
6525 "xmlParseInternalSubset: error detected in Markup declaration\n");
6526 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006527 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 break;
6529 }
6530 }
6531 if (RAW == ']') {
6532 NEXT;
6533 SKIP_BLANKS;
6534 }
6535 }
6536
6537 /*
6538 * We should be at the end of the DOCTYPE declaration.
6539 */
6540 if (RAW != '>') {
6541 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006543 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006544 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006545 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006546 }
6547 NEXT;
6548}
6549
6550/**
6551 * xmlParseAttribute:
6552 * @ctxt: an XML parser context
6553 * @value: a xmlChar ** used to store the value of the attribute
6554 *
6555 * parse an attribute
6556 *
6557 * [41] Attribute ::= Name Eq AttValue
6558 *
6559 * [ WFC: No External Entity References ]
6560 * Attribute values cannot contain direct or indirect entity references
6561 * to external entities.
6562 *
6563 * [ WFC: No < in Attribute Values ]
6564 * The replacement text of any entity referred to directly or indirectly in
6565 * an attribute value (other than "&lt;") must not contain a <.
6566 *
6567 * [ VC: Attribute Value Type ]
6568 * The attribute must have been declared; the value must be of the type
6569 * declared for it.
6570 *
6571 * [25] Eq ::= S? '=' S?
6572 *
6573 * With namespace:
6574 *
6575 * [NS 11] Attribute ::= QName Eq AttValue
6576 *
6577 * Also the case QName == xmlns:??? is handled independently as a namespace
6578 * definition.
6579 *
6580 * Returns the attribute name, and the value in *value.
6581 */
6582
6583xmlChar *
6584xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6585 xmlChar *name, *val;
6586
6587 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006588 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006589 name = xmlParseName(ctxt);
6590 if (name == NULL) {
6591 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6593 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6594 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006595 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 return(NULL);
6597 }
6598
6599 /*
6600 * read the value
6601 */
6602 SKIP_BLANKS;
6603 if (RAW == '=') {
6604 NEXT;
6605 SKIP_BLANKS;
6606 val = xmlParseAttValue(ctxt);
6607 ctxt->instate = XML_PARSER_CONTENT;
6608 } else {
6609 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6611 ctxt->sax->error(ctxt->userData,
6612 "Specification mandate value for attribute %s\n", name);
6613 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006614 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006615 xmlFree(name);
6616 return(NULL);
6617 }
6618
6619 /*
6620 * Check that xml:lang conforms to the specification
6621 * No more registered as an error, just generate a warning now
6622 * since this was deprecated in XML second edition
6623 */
6624 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6625 if (!xmlCheckLanguageID(val)) {
6626 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6627 ctxt->sax->warning(ctxt->userData,
6628 "Malformed value for xml:lang : %s\n", val);
6629 }
6630 }
6631
6632 /*
6633 * Check that xml:space conforms to the specification
6634 */
6635 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6636 if (xmlStrEqual(val, BAD_CAST "default"))
6637 *(ctxt->space) = 0;
6638 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6639 *(ctxt->space) = 1;
6640 else {
6641 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6643 ctxt->sax->error(ctxt->userData,
Daniel Veillard642104e2003-03-26 16:32:05 +00006644"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006645 val);
6646 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006647 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
6649 }
6650
6651 *value = val;
6652 return(name);
6653}
6654
6655/**
6656 * xmlParseStartTag:
6657 * @ctxt: an XML parser context
6658 *
6659 * parse a start of tag either for rule element or
6660 * EmptyElement. In both case we don't parse the tag closing chars.
6661 *
6662 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6663 *
6664 * [ WFC: Unique Att Spec ]
6665 * No attribute name may appear more than once in the same start-tag or
6666 * empty-element tag.
6667 *
6668 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6669 *
6670 * [ WFC: Unique Att Spec ]
6671 * No attribute name may appear more than once in the same start-tag or
6672 * empty-element tag.
6673 *
6674 * With namespace:
6675 *
6676 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6677 *
6678 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6679 *
6680 * Returns the element name parsed
6681 */
6682
6683xmlChar *
6684xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6685 xmlChar *name;
6686 xmlChar *attname;
6687 xmlChar *attvalue;
6688 const xmlChar **atts = NULL;
6689 int nbatts = 0;
6690 int maxatts = 0;
6691 int i;
6692
6693 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006694 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006695
6696 name = xmlParseName(ctxt);
6697 if (name == NULL) {
6698 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6700 ctxt->sax->error(ctxt->userData,
6701 "xmlParseStartTag: invalid element name\n");
6702 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006703 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 return(NULL);
6705 }
6706
6707 /*
6708 * Now parse the attributes, it ends up with the ending
6709 *
6710 * (S Attribute)* S?
6711 */
6712 SKIP_BLANKS;
6713 GROW;
6714
Daniel Veillard21a0f912001-02-25 19:54:14 +00006715 while ((RAW != '>') &&
6716 ((RAW != '/') || (NXT(1) != '>')) &&
6717 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006718 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006719 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006720
6721 attname = xmlParseAttribute(ctxt, &attvalue);
6722 if ((attname != NULL) && (attvalue != NULL)) {
6723 /*
6724 * [ WFC: Unique Att Spec ]
6725 * No attribute name may appear more than once in the same
6726 * start-tag or empty-element tag.
6727 */
6728 for (i = 0; i < nbatts;i += 2) {
6729 if (xmlStrEqual(atts[i], attname)) {
6730 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6732 ctxt->sax->error(ctxt->userData,
6733 "Attribute %s redefined\n",
6734 attname);
6735 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006736 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006737 xmlFree(attname);
6738 xmlFree(attvalue);
6739 goto failed;
6740 }
6741 }
6742
6743 /*
6744 * Add the pair to atts
6745 */
6746 if (atts == NULL) {
6747 maxatts = 10;
6748 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6749 if (atts == NULL) {
6750 xmlGenericError(xmlGenericErrorContext,
6751 "malloc of %ld byte failed\n",
6752 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006753 if (attname != NULL)
6754 xmlFree(attname);
6755 if (attvalue != NULL)
6756 xmlFree(attvalue);
6757 ctxt->errNo = XML_ERR_NO_MEMORY;
6758 ctxt->instate = XML_PARSER_EOF;
6759 ctxt->disableSAX = 1;
6760 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006761 }
6762 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006763 const xmlChar **n;
6764
Owen Taylor3473f882001-02-23 17:55:21 +00006765 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006766 n = (const xmlChar **) xmlRealloc((void *) atts,
Owen Taylor3473f882001-02-23 17:55:21 +00006767 maxatts * sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006768 if (n == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006769 xmlGenericError(xmlGenericErrorContext,
6770 "realloc of %ld byte failed\n",
6771 maxatts * (long)sizeof(xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006772 if (attname != NULL)
6773 xmlFree(attname);
6774 if (attvalue != NULL)
6775 xmlFree(attvalue);
6776 ctxt->errNo = XML_ERR_NO_MEMORY;
6777 ctxt->instate = XML_PARSER_EOF;
6778 ctxt->disableSAX = 1;
6779 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006780 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006781 atts = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006782 }
6783 atts[nbatts++] = attname;
6784 atts[nbatts++] = attvalue;
6785 atts[nbatts] = NULL;
6786 atts[nbatts + 1] = NULL;
6787 } else {
6788 if (attname != NULL)
6789 xmlFree(attname);
6790 if (attvalue != NULL)
6791 xmlFree(attvalue);
6792 }
6793
6794failed:
6795
Daniel Veillard3772de32002-12-17 10:31:45 +00006796 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006797 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6798 break;
6799 if (!IS_BLANK(RAW)) {
6800 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6802 ctxt->sax->error(ctxt->userData,
6803 "attributes construct error\n");
6804 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006805 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006806 }
6807 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006808 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6809 (attname == NULL) && (attvalue == NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006810 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6812 ctxt->sax->error(ctxt->userData,
6813 "xmlParseStartTag: problem parsing attributes\n");
6814 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006815 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006816 break;
6817 }
6818 GROW;
6819 }
6820
6821 /*
6822 * SAX: Start of Element !
6823 */
6824 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6825 (!ctxt->disableSAX))
6826 ctxt->sax->startElement(ctxt->userData, name, atts);
6827
6828 if (atts != NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006829 for (i = 0;i < nbatts;i++)
6830 if (atts[i] != NULL)
6831 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006832 xmlFree((void *) atts);
6833 }
6834 return(name);
6835}
6836
6837/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006838 * xmlParseEndTagInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00006839 * @ctxt: an XML parser context
6840 *
6841 * parse an end of tag
6842 *
6843 * [42] ETag ::= '</' Name S? '>'
6844 *
6845 * With namespace
6846 *
6847 * [NS 9] ETag ::= '</' QName S? '>'
6848 */
6849
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006850static void
6851xmlParseEndTagInternal(xmlParserCtxtPtr ctxt, int line) {
Owen Taylor3473f882001-02-23 17:55:21 +00006852 xmlChar *name;
6853 xmlChar *oldname;
6854
6855 GROW;
6856 if ((RAW != '<') || (NXT(1) != '/')) {
6857 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6859 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6860 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006861 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006862 return;
6863 }
6864 SKIP(2);
6865
Daniel Veillard46de64e2002-05-29 08:21:33 +00006866 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006867
6868 /*
6869 * We should definitely be at the ending "S? '>'" part
6870 */
6871 GROW;
6872 SKIP_BLANKS;
6873 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6874 ctxt->errNo = XML_ERR_GT_REQUIRED;
6875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6876 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6877 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006878 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006879 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006880 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006881
6882 /*
6883 * [ WFC: Element Type Match ]
6884 * The Name in an element's end-tag must match the element type in the
6885 * start-tag.
6886 *
6887 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006888 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006889 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006891 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006892 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006893 "Opening and ending tag mismatch: %s line %d and %s\n",
6894 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006895 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006896 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006897 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006898 }
6899
6900 }
6901 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006902 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006903 if (name != NULL)
6904 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00006905 }
6906
6907 /*
6908 * SAX: End of Tag
6909 */
6910 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6911 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006912 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006913
Owen Taylor3473f882001-02-23 17:55:21 +00006914 oldname = namePop(ctxt);
6915 spacePop(ctxt);
6916 if (oldname != NULL) {
6917#ifdef DEBUG_STACK
6918 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6919#endif
6920 xmlFree(oldname);
6921 }
6922 return;
6923}
6924
6925/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006926 * xmlParseEndTag:
6927 * @ctxt: an XML parser context
6928 *
6929 * parse an end of tag
6930 *
6931 * [42] ETag ::= '</' Name S? '>'
6932 *
6933 * With namespace
6934 *
6935 * [NS 9] ETag ::= '</' QName S? '>'
6936 */
6937
6938void
6939xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6940 xmlParseEndTagInternal(ctxt, 0);
6941}
6942
6943/**
Owen Taylor3473f882001-02-23 17:55:21 +00006944 * xmlParseCDSect:
6945 * @ctxt: an XML parser context
6946 *
6947 * Parse escaped pure raw content.
6948 *
6949 * [18] CDSect ::= CDStart CData CDEnd
6950 *
6951 * [19] CDStart ::= '<![CDATA['
6952 *
6953 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6954 *
6955 * [21] CDEnd ::= ']]>'
6956 */
6957void
6958xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6959 xmlChar *buf = NULL;
6960 int len = 0;
6961 int size = XML_PARSER_BUFFER_SIZE;
6962 int r, rl;
6963 int s, sl;
6964 int cur, l;
6965 int count = 0;
6966
6967 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6968 (NXT(2) == '[') && (NXT(3) == 'C') &&
6969 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6970 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6971 (NXT(8) == '[')) {
6972 SKIP(9);
6973 } else
6974 return;
6975
6976 ctxt->instate = XML_PARSER_CDATA_SECTION;
6977 r = CUR_CHAR(rl);
6978 if (!IS_CHAR(r)) {
6979 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6981 ctxt->sax->error(ctxt->userData,
6982 "CData section not finished\n");
6983 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006984 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006985 ctxt->instate = XML_PARSER_CONTENT;
6986 return;
6987 }
6988 NEXTL(rl);
6989 s = CUR_CHAR(sl);
6990 if (!IS_CHAR(s)) {
6991 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6993 ctxt->sax->error(ctxt->userData,
6994 "CData section not finished\n");
6995 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006996 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006997 ctxt->instate = XML_PARSER_CONTENT;
6998 return;
6999 }
7000 NEXTL(sl);
7001 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007002 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007003 if (buf == NULL) {
7004 xmlGenericError(xmlGenericErrorContext,
7005 "malloc of %d byte failed\n", size);
7006 return;
7007 }
7008 while (IS_CHAR(cur) &&
7009 ((r != ']') || (s != ']') || (cur != '>'))) {
7010 if (len + 5 >= size) {
7011 size *= 2;
7012 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7013 if (buf == NULL) {
7014 xmlGenericError(xmlGenericErrorContext,
7015 "realloc of %d byte failed\n", size);
7016 return;
7017 }
7018 }
7019 COPY_BUF(rl,buf,len,r);
7020 r = s;
7021 rl = sl;
7022 s = cur;
7023 sl = l;
7024 count++;
7025 if (count > 50) {
7026 GROW;
7027 count = 0;
7028 }
7029 NEXTL(l);
7030 cur = CUR_CHAR(l);
7031 }
7032 buf[len] = 0;
7033 ctxt->instate = XML_PARSER_CONTENT;
7034 if (cur != '>') {
7035 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
7036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7037 ctxt->sax->error(ctxt->userData,
7038 "CData section not finished\n%.50s\n", buf);
7039 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007040 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007041 xmlFree(buf);
7042 return;
7043 }
7044 NEXTL(l);
7045
7046 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007047 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007048 */
7049 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7050 if (ctxt->sax->cdataBlock != NULL)
7051 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007052 else if (ctxt->sax->characters != NULL)
7053 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007054 }
7055 xmlFree(buf);
7056}
7057
7058/**
7059 * xmlParseContent:
7060 * @ctxt: an XML parser context
7061 *
7062 * Parse a content:
7063 *
7064 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7065 */
7066
7067void
7068xmlParseContent(xmlParserCtxtPtr ctxt) {
7069 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007070 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007071 ((RAW != '<') || (NXT(1) != '/'))) {
7072 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007073 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007074 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007075
7076 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007077 * First case : a Processing Instruction.
7078 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007079 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007080 xmlParsePI(ctxt);
7081 }
7082
7083 /*
7084 * Second case : a CDSection
7085 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007086 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007087 (NXT(2) == '[') && (NXT(3) == 'C') &&
7088 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7089 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7090 (NXT(8) == '[')) {
7091 xmlParseCDSect(ctxt);
7092 }
7093
7094 /*
7095 * Third case : a comment
7096 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007097 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007098 (NXT(2) == '-') && (NXT(3) == '-')) {
7099 xmlParseComment(ctxt);
7100 ctxt->instate = XML_PARSER_CONTENT;
7101 }
7102
7103 /*
7104 * Fourth case : a sub-element.
7105 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007106 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007107 xmlParseElement(ctxt);
7108 }
7109
7110 /*
7111 * Fifth case : a reference. If if has not been resolved,
7112 * parsing returns it's Name, create the node
7113 */
7114
Daniel Veillard21a0f912001-02-25 19:54:14 +00007115 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007116 xmlParseReference(ctxt);
7117 }
7118
7119 /*
7120 * Last case, text. Note that References are handled directly.
7121 */
7122 else {
7123 xmlParseCharData(ctxt, 0);
7124 }
7125
7126 GROW;
7127 /*
7128 * Pop-up of finished entities.
7129 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007130 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007131 xmlPopInput(ctxt);
7132 SHRINK;
7133
Daniel Veillardfdc91562002-07-01 21:52:03 +00007134 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007135 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7137 ctxt->sax->error(ctxt->userData,
7138 "detected an error in element content\n");
7139 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007140 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007141 ctxt->instate = XML_PARSER_EOF;
7142 break;
7143 }
7144 }
7145}
7146
7147/**
7148 * xmlParseElement:
7149 * @ctxt: an XML parser context
7150 *
7151 * parse an XML element, this is highly recursive
7152 *
7153 * [39] element ::= EmptyElemTag | STag content ETag
7154 *
7155 * [ WFC: Element Type Match ]
7156 * The Name in an element's end-tag must match the element type in the
7157 * start-tag.
7158 *
7159 * [ VC: Element Valid ]
7160 * An element is valid if there is a declaration matching elementdecl
7161 * where the Name matches the element type and one of the following holds:
7162 * - The declaration matches EMPTY and the element has no content.
7163 * - The declaration matches children and the sequence of child elements
7164 * belongs to the language generated by the regular expression in the
7165 * content model, with optional white space (characters matching the
7166 * nonterminal S) between each pair of child elements.
7167 * - The declaration matches Mixed and the content consists of character
7168 * data and child elements whose types match names in the content model.
7169 * - The declaration matches ANY, and the types of any child elements have
7170 * been declared.
7171 */
7172
7173void
7174xmlParseElement(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00007175 xmlChar *name;
7176 xmlChar *oldname;
7177 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007178 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007179 xmlNodePtr ret;
7180
7181 /* Capture start position */
7182 if (ctxt->record_info) {
7183 node_info.begin_pos = ctxt->input->consumed +
7184 (CUR_PTR - ctxt->input->base);
7185 node_info.begin_line = ctxt->input->line;
7186 }
7187
7188 if (ctxt->spaceNr == 0)
7189 spacePush(ctxt, -1);
7190 else
7191 spacePush(ctxt, *ctxt->space);
7192
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007193 line = ctxt->input->line;
Owen Taylor3473f882001-02-23 17:55:21 +00007194 name = xmlParseStartTag(ctxt);
7195 if (name == NULL) {
7196 spacePop(ctxt);
7197 return;
7198 }
7199 namePush(ctxt, name);
7200 ret = ctxt->node;
7201
7202 /*
7203 * [ VC: Root Element Type ]
7204 * The Name in the document type declaration must match the element
7205 * type of the root element.
7206 */
7207 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7208 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7209 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7210
7211 /*
7212 * Check for an Empty Element.
7213 */
7214 if ((RAW == '/') && (NXT(1) == '>')) {
7215 SKIP(2);
7216 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7217 (!ctxt->disableSAX))
7218 ctxt->sax->endElement(ctxt->userData, name);
7219 oldname = namePop(ctxt);
7220 spacePop(ctxt);
7221 if (oldname != NULL) {
7222#ifdef DEBUG_STACK
7223 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7224#endif
7225 xmlFree(oldname);
7226 }
7227 if ( ret != NULL && ctxt->record_info ) {
7228 node_info.end_pos = ctxt->input->consumed +
7229 (CUR_PTR - ctxt->input->base);
7230 node_info.end_line = ctxt->input->line;
7231 node_info.node = ret;
7232 xmlParserAddNodeInfo(ctxt, &node_info);
7233 }
7234 return;
7235 }
7236 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007237 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007238 } else {
7239 ctxt->errNo = XML_ERR_GT_REQUIRED;
7240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7241 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007242 "Couldn't find end of Start Tag %s line %d\n",
7243 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007244 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007245 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007246
7247 /*
7248 * end of parsing of this node.
7249 */
7250 nodePop(ctxt);
7251 oldname = namePop(ctxt);
7252 spacePop(ctxt);
7253 if (oldname != NULL) {
7254#ifdef DEBUG_STACK
7255 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7256#endif
7257 xmlFree(oldname);
7258 }
7259
7260 /*
7261 * Capture end position and add node
7262 */
7263 if ( ret != NULL && ctxt->record_info ) {
7264 node_info.end_pos = ctxt->input->consumed +
7265 (CUR_PTR - ctxt->input->base);
7266 node_info.end_line = ctxt->input->line;
7267 node_info.node = ret;
7268 xmlParserAddNodeInfo(ctxt, &node_info);
7269 }
7270 return;
7271 }
7272
7273 /*
7274 * Parse the content of the element:
7275 */
7276 xmlParseContent(ctxt);
7277 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00007278 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00007279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7280 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007281 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007282 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007283 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007284
7285 /*
7286 * end of parsing of this node.
7287 */
7288 nodePop(ctxt);
7289 oldname = namePop(ctxt);
7290 spacePop(ctxt);
7291 if (oldname != NULL) {
7292#ifdef DEBUG_STACK
7293 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7294#endif
7295 xmlFree(oldname);
7296 }
7297 return;
7298 }
7299
7300 /*
7301 * parse the end of tag: '</' should be here.
7302 */
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007303 xmlParseEndTagInternal(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007304
7305 /*
7306 * Capture end position and add node
7307 */
7308 if ( ret != NULL && ctxt->record_info ) {
7309 node_info.end_pos = ctxt->input->consumed +
7310 (CUR_PTR - ctxt->input->base);
7311 node_info.end_line = ctxt->input->line;
7312 node_info.node = ret;
7313 xmlParserAddNodeInfo(ctxt, &node_info);
7314 }
7315}
7316
7317/**
7318 * xmlParseVersionNum:
7319 * @ctxt: an XML parser context
7320 *
7321 * parse the XML version value.
7322 *
7323 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7324 *
7325 * Returns the string giving the XML version number, or NULL
7326 */
7327xmlChar *
7328xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7329 xmlChar *buf = NULL;
7330 int len = 0;
7331 int size = 10;
7332 xmlChar cur;
7333
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007334 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007335 if (buf == NULL) {
7336 xmlGenericError(xmlGenericErrorContext,
7337 "malloc of %d byte failed\n", size);
7338 return(NULL);
7339 }
7340 cur = CUR;
7341 while (((cur >= 'a') && (cur <= 'z')) ||
7342 ((cur >= 'A') && (cur <= 'Z')) ||
7343 ((cur >= '0') && (cur <= '9')) ||
7344 (cur == '_') || (cur == '.') ||
7345 (cur == ':') || (cur == '-')) {
7346 if (len + 1 >= size) {
7347 size *= 2;
7348 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7349 if (buf == NULL) {
7350 xmlGenericError(xmlGenericErrorContext,
7351 "realloc of %d byte failed\n", size);
7352 return(NULL);
7353 }
7354 }
7355 buf[len++] = cur;
7356 NEXT;
7357 cur=CUR;
7358 }
7359 buf[len] = 0;
7360 return(buf);
7361}
7362
7363/**
7364 * xmlParseVersionInfo:
7365 * @ctxt: an XML parser context
7366 *
7367 * parse the XML version.
7368 *
7369 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7370 *
7371 * [25] Eq ::= S? '=' S?
7372 *
7373 * Returns the version string, e.g. "1.0"
7374 */
7375
7376xmlChar *
7377xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7378 xmlChar *version = NULL;
7379 const xmlChar *q;
7380
7381 if ((RAW == 'v') && (NXT(1) == 'e') &&
7382 (NXT(2) == 'r') && (NXT(3) == 's') &&
7383 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7384 (NXT(6) == 'n')) {
7385 SKIP(7);
7386 SKIP_BLANKS;
7387 if (RAW != '=') {
7388 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7390 ctxt->sax->error(ctxt->userData,
7391 "xmlParseVersionInfo : expected '='\n");
7392 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007393 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007394 return(NULL);
7395 }
7396 NEXT;
7397 SKIP_BLANKS;
7398 if (RAW == '"') {
7399 NEXT;
7400 q = CUR_PTR;
7401 version = xmlParseVersionNum(ctxt);
7402 if (RAW != '"') {
7403 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7405 ctxt->sax->error(ctxt->userData,
7406 "String not closed\n%.50s\n", q);
7407 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007408 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007409 } else
7410 NEXT;
7411 } else if (RAW == '\''){
7412 NEXT;
7413 q = CUR_PTR;
7414 version = xmlParseVersionNum(ctxt);
7415 if (RAW != '\'') {
7416 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7418 ctxt->sax->error(ctxt->userData,
7419 "String not closed\n%.50s\n", q);
7420 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007421 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007422 } else
7423 NEXT;
7424 } else {
7425 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData,
7428 "xmlParseVersionInfo : expected ' or \"\n");
7429 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007430 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007431 }
7432 }
7433 return(version);
7434}
7435
7436/**
7437 * xmlParseEncName:
7438 * @ctxt: an XML parser context
7439 *
7440 * parse the XML encoding name
7441 *
7442 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7443 *
7444 * Returns the encoding name value or NULL
7445 */
7446xmlChar *
7447xmlParseEncName(xmlParserCtxtPtr ctxt) {
7448 xmlChar *buf = NULL;
7449 int len = 0;
7450 int size = 10;
7451 xmlChar cur;
7452
7453 cur = CUR;
7454 if (((cur >= 'a') && (cur <= 'z')) ||
7455 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007456 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007457 if (buf == NULL) {
7458 xmlGenericError(xmlGenericErrorContext,
7459 "malloc of %d byte failed\n", size);
7460 return(NULL);
7461 }
7462
7463 buf[len++] = cur;
7464 NEXT;
7465 cur = CUR;
7466 while (((cur >= 'a') && (cur <= 'z')) ||
7467 ((cur >= 'A') && (cur <= 'Z')) ||
7468 ((cur >= '0') && (cur <= '9')) ||
7469 (cur == '.') || (cur == '_') ||
7470 (cur == '-')) {
7471 if (len + 1 >= size) {
7472 size *= 2;
7473 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7474 if (buf == NULL) {
7475 xmlGenericError(xmlGenericErrorContext,
7476 "realloc of %d byte failed\n", size);
7477 return(NULL);
7478 }
7479 }
7480 buf[len++] = cur;
7481 NEXT;
7482 cur = CUR;
7483 if (cur == 0) {
7484 SHRINK;
7485 GROW;
7486 cur = CUR;
7487 }
7488 }
7489 buf[len] = 0;
7490 } else {
7491 ctxt->errNo = XML_ERR_ENCODING_NAME;
7492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7493 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7494 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007495 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007496 }
7497 return(buf);
7498}
7499
7500/**
7501 * xmlParseEncodingDecl:
7502 * @ctxt: an XML parser context
7503 *
7504 * parse the XML encoding declaration
7505 *
7506 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7507 *
7508 * this setups the conversion filters.
7509 *
7510 * Returns the encoding value or NULL
7511 */
7512
7513xmlChar *
7514xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7515 xmlChar *encoding = NULL;
7516 const xmlChar *q;
7517
7518 SKIP_BLANKS;
7519 if ((RAW == 'e') && (NXT(1) == 'n') &&
7520 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7521 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7522 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7523 SKIP(8);
7524 SKIP_BLANKS;
7525 if (RAW != '=') {
7526 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7528 ctxt->sax->error(ctxt->userData,
7529 "xmlParseEncodingDecl : expected '='\n");
7530 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007531 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007532 return(NULL);
7533 }
7534 NEXT;
7535 SKIP_BLANKS;
7536 if (RAW == '"') {
7537 NEXT;
7538 q = CUR_PTR;
7539 encoding = xmlParseEncName(ctxt);
7540 if (RAW != '"') {
7541 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7543 ctxt->sax->error(ctxt->userData,
7544 "String not closed\n%.50s\n", q);
7545 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007546 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007547 } else
7548 NEXT;
7549 } else if (RAW == '\''){
7550 NEXT;
7551 q = CUR_PTR;
7552 encoding = xmlParseEncName(ctxt);
7553 if (RAW != '\'') {
7554 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7556 ctxt->sax->error(ctxt->userData,
7557 "String not closed\n%.50s\n", q);
7558 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007559 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007560 } else
7561 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007562 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007563 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7565 ctxt->sax->error(ctxt->userData,
7566 "xmlParseEncodingDecl : expected ' or \"\n");
7567 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007568 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007569 }
7570 if (encoding != NULL) {
7571 xmlCharEncoding enc;
7572 xmlCharEncodingHandlerPtr handler;
7573
7574 if (ctxt->input->encoding != NULL)
7575 xmlFree((xmlChar *) ctxt->input->encoding);
7576 ctxt->input->encoding = encoding;
7577
7578 enc = xmlParseCharEncoding((const char *) encoding);
7579 /*
7580 * registered set of known encodings
7581 */
7582 if (enc != XML_CHAR_ENCODING_ERROR) {
7583 xmlSwitchEncoding(ctxt, enc);
7584 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
Daniel Veillard46d6c442002-04-09 16:10:39 +00007585 ctxt->input->encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007586 xmlFree(encoding);
7587 return(NULL);
7588 }
7589 } else {
7590 /*
7591 * fallback for unknown encodings
7592 */
7593 handler = xmlFindCharEncodingHandler((const char *) encoding);
7594 if (handler != NULL) {
7595 xmlSwitchToEncoding(ctxt, handler);
7596 } else {
7597 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7599 ctxt->sax->error(ctxt->userData,
7600 "Unsupported encoding %s\n", encoding);
7601 return(NULL);
7602 }
7603 }
7604 }
7605 }
7606 return(encoding);
7607}
7608
7609/**
7610 * xmlParseSDDecl:
7611 * @ctxt: an XML parser context
7612 *
7613 * parse the XML standalone declaration
7614 *
7615 * [32] SDDecl ::= S 'standalone' Eq
7616 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7617 *
7618 * [ VC: Standalone Document Declaration ]
7619 * TODO The standalone document declaration must have the value "no"
7620 * if any external markup declarations contain declarations of:
7621 * - attributes with default values, if elements to which these
7622 * attributes apply appear in the document without specifications
7623 * of values for these attributes, or
7624 * - entities (other than amp, lt, gt, apos, quot), if references
7625 * to those entities appear in the document, or
7626 * - attributes with values subject to normalization, where the
7627 * attribute appears in the document with a value which will change
7628 * as a result of normalization, or
7629 * - element types with element content, if white space occurs directly
7630 * within any instance of those types.
7631 *
7632 * Returns 1 if standalone, 0 otherwise
7633 */
7634
7635int
7636xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7637 int standalone = -1;
7638
7639 SKIP_BLANKS;
7640 if ((RAW == 's') && (NXT(1) == 't') &&
7641 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7642 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7643 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7644 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7645 SKIP(10);
7646 SKIP_BLANKS;
7647 if (RAW != '=') {
7648 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7650 ctxt->sax->error(ctxt->userData,
7651 "XML standalone declaration : expected '='\n");
7652 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007653 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007654 return(standalone);
7655 }
7656 NEXT;
7657 SKIP_BLANKS;
7658 if (RAW == '\''){
7659 NEXT;
7660 if ((RAW == 'n') && (NXT(1) == 'o')) {
7661 standalone = 0;
7662 SKIP(2);
7663 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7664 (NXT(2) == 's')) {
7665 standalone = 1;
7666 SKIP(3);
7667 } else {
7668 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7670 ctxt->sax->error(ctxt->userData,
7671 "standalone accepts only 'yes' or 'no'\n");
7672 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007673 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007674 }
7675 if (RAW != '\'') {
7676 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7678 ctxt->sax->error(ctxt->userData, "String not closed\n");
7679 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007680 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007681 } else
7682 NEXT;
7683 } else if (RAW == '"'){
7684 NEXT;
7685 if ((RAW == 'n') && (NXT(1) == 'o')) {
7686 standalone = 0;
7687 SKIP(2);
7688 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7689 (NXT(2) == 's')) {
7690 standalone = 1;
7691 SKIP(3);
7692 } else {
7693 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7695 ctxt->sax->error(ctxt->userData,
7696 "standalone accepts only 'yes' or 'no'\n");
7697 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007698 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007699 }
7700 if (RAW != '"') {
7701 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7703 ctxt->sax->error(ctxt->userData, "String not closed\n");
7704 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007705 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007706 } else
7707 NEXT;
7708 } else {
7709 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7711 ctxt->sax->error(ctxt->userData,
7712 "Standalone value not found\n");
7713 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007714 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007715 }
7716 }
7717 return(standalone);
7718}
7719
7720/**
7721 * xmlParseXMLDecl:
7722 * @ctxt: an XML parser context
7723 *
7724 * parse an XML declaration header
7725 *
7726 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7727 */
7728
7729void
7730xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7731 xmlChar *version;
7732
7733 /*
7734 * We know that '<?xml' is here.
7735 */
7736 SKIP(5);
7737
7738 if (!IS_BLANK(RAW)) {
7739 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7741 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7742 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007743 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007744 }
7745 SKIP_BLANKS;
7746
7747 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007748 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007749 */
7750 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007751 if (version == NULL) {
7752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7753 ctxt->sax->error(ctxt->userData,
7754 "Malformed declaration expecting version\n");
7755 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007756 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard19840942001-11-29 16:11:38 +00007757 } else {
7758 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7759 /*
7760 * TODO: Blueberry should be detected here
7761 */
7762 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7763 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7764 version);
7765 }
7766 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007767 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007768 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007769 }
Owen Taylor3473f882001-02-23 17:55:21 +00007770
7771 /*
7772 * We may have the encoding declaration
7773 */
7774 if (!IS_BLANK(RAW)) {
7775 if ((RAW == '?') && (NXT(1) == '>')) {
7776 SKIP(2);
7777 return;
7778 }
7779 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7781 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7782 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007783 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007784 }
7785 xmlParseEncodingDecl(ctxt);
7786 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7787 /*
7788 * The XML REC instructs us to stop parsing right here
7789 */
7790 return;
7791 }
7792
7793 /*
7794 * We may have the standalone status.
7795 */
7796 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7797 if ((RAW == '?') && (NXT(1) == '>')) {
7798 SKIP(2);
7799 return;
7800 }
7801 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7803 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7804 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007805 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007806 }
7807 SKIP_BLANKS;
7808 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7809
7810 SKIP_BLANKS;
7811 if ((RAW == '?') && (NXT(1) == '>')) {
7812 SKIP(2);
7813 } else if (RAW == '>') {
7814 /* Deprecated old WD ... */
7815 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7817 ctxt->sax->error(ctxt->userData,
7818 "XML declaration must end-up with '?>'\n");
7819 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007820 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007821 NEXT;
7822 } else {
7823 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7825 ctxt->sax->error(ctxt->userData,
7826 "parsing XML declaration: '?>' expected\n");
7827 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007828 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007829 MOVETO_ENDTAG(CUR_PTR);
7830 NEXT;
7831 }
7832}
7833
7834/**
7835 * xmlParseMisc:
7836 * @ctxt: an XML parser context
7837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007838 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007839 *
7840 * [27] Misc ::= Comment | PI | S
7841 */
7842
7843void
7844xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007845 while (((RAW == '<') && (NXT(1) == '?')) ||
7846 ((RAW == '<') && (NXT(1) == '!') &&
7847 (NXT(2) == '-') && (NXT(3) == '-')) ||
7848 IS_BLANK(CUR)) {
7849 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007850 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007851 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007852 NEXT;
7853 } else
7854 xmlParseComment(ctxt);
7855 }
7856}
7857
7858/**
7859 * xmlParseDocument:
7860 * @ctxt: an XML parser context
7861 *
7862 * parse an XML document (and build a tree if using the standard SAX
7863 * interface).
7864 *
7865 * [1] document ::= prolog element Misc*
7866 *
7867 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7868 *
7869 * Returns 0, -1 in case of error. the parser context is augmented
7870 * as a result of the parsing.
7871 */
7872
7873int
7874xmlParseDocument(xmlParserCtxtPtr ctxt) {
7875 xmlChar start[4];
7876 xmlCharEncoding enc;
7877
7878 xmlInitParser();
7879
7880 GROW;
7881
7882 /*
7883 * SAX: beginning of the document processing.
7884 */
7885 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7886 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7887
Daniel Veillard50f34372001-08-03 12:06:36 +00007888 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007889 /*
7890 * Get the 4 first bytes and decode the charset
7891 * if enc != XML_CHAR_ENCODING_NONE
7892 * plug some encoding conversion routines.
7893 */
7894 start[0] = RAW;
7895 start[1] = NXT(1);
7896 start[2] = NXT(2);
7897 start[3] = NXT(3);
7898 enc = xmlDetectCharEncoding(start, 4);
7899 if (enc != XML_CHAR_ENCODING_NONE) {
7900 xmlSwitchEncoding(ctxt, enc);
7901 }
Owen Taylor3473f882001-02-23 17:55:21 +00007902 }
7903
7904
7905 if (CUR == 0) {
7906 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7908 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7909 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007910 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007911 }
7912
7913 /*
7914 * Check for the XMLDecl in the Prolog.
7915 */
7916 GROW;
7917 if ((RAW == '<') && (NXT(1) == '?') &&
7918 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7919 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7920
7921 /*
7922 * Note that we will switch encoding on the fly.
7923 */
7924 xmlParseXMLDecl(ctxt);
7925 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7926 /*
7927 * The XML REC instructs us to stop parsing right here
7928 */
7929 return(-1);
7930 }
7931 ctxt->standalone = ctxt->input->standalone;
7932 SKIP_BLANKS;
7933 } else {
7934 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7935 }
7936 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7937 ctxt->sax->startDocument(ctxt->userData);
7938
7939 /*
7940 * The Misc part of the Prolog
7941 */
7942 GROW;
7943 xmlParseMisc(ctxt);
7944
7945 /*
7946 * Then possibly doc type declaration(s) and more Misc
7947 * (doctypedecl Misc*)?
7948 */
7949 GROW;
7950 if ((RAW == '<') && (NXT(1) == '!') &&
7951 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7952 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7953 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7954 (NXT(8) == 'E')) {
7955
7956 ctxt->inSubset = 1;
7957 xmlParseDocTypeDecl(ctxt);
7958 if (RAW == '[') {
7959 ctxt->instate = XML_PARSER_DTD;
7960 xmlParseInternalSubset(ctxt);
7961 }
7962
7963 /*
7964 * Create and update the external subset.
7965 */
7966 ctxt->inSubset = 2;
7967 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7968 (!ctxt->disableSAX))
7969 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7970 ctxt->extSubSystem, ctxt->extSubURI);
7971 ctxt->inSubset = 0;
7972
7973
7974 ctxt->instate = XML_PARSER_PROLOG;
7975 xmlParseMisc(ctxt);
7976 }
7977
7978 /*
7979 * Time to start parsing the tree itself
7980 */
7981 GROW;
7982 if (RAW != '<') {
7983 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7985 ctxt->sax->error(ctxt->userData,
7986 "Start tag expected, '<' not found\n");
7987 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007988 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007989 ctxt->instate = XML_PARSER_EOF;
7990 } else {
7991 ctxt->instate = XML_PARSER_CONTENT;
7992 xmlParseElement(ctxt);
7993 ctxt->instate = XML_PARSER_EPILOG;
7994
7995
7996 /*
7997 * The Misc part at the end
7998 */
7999 xmlParseMisc(ctxt);
8000
Daniel Veillard561b7f82002-03-20 21:55:57 +00008001 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008002 ctxt->errNo = XML_ERR_DOCUMENT_END;
8003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8004 ctxt->sax->error(ctxt->userData,
8005 "Extra content at the end of the document\n");
8006 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008007 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008008 }
8009 ctxt->instate = XML_PARSER_EOF;
8010 }
8011
8012 /*
8013 * SAX: end of the document processing.
8014 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008015 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008016 ctxt->sax->endDocument(ctxt->userData);
8017
Daniel Veillard5997aca2002-03-18 18:36:20 +00008018 /*
8019 * Remove locally kept entity definitions if the tree was not built
8020 */
8021 if ((ctxt->myDoc != NULL) &&
8022 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8023 xmlFreeDoc(ctxt->myDoc);
8024 ctxt->myDoc = NULL;
8025 }
8026
Daniel Veillardc7612992002-02-17 22:47:37 +00008027 if (! ctxt->wellFormed) {
8028 ctxt->valid = 0;
8029 return(-1);
8030 }
Owen Taylor3473f882001-02-23 17:55:21 +00008031 return(0);
8032}
8033
8034/**
8035 * xmlParseExtParsedEnt:
8036 * @ctxt: an XML parser context
8037 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008038 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008039 * An external general parsed entity is well-formed if it matches the
8040 * production labeled extParsedEnt.
8041 *
8042 * [78] extParsedEnt ::= TextDecl? content
8043 *
8044 * Returns 0, -1 in case of error. the parser context is augmented
8045 * as a result of the parsing.
8046 */
8047
8048int
8049xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8050 xmlChar start[4];
8051 xmlCharEncoding enc;
8052
8053 xmlDefaultSAXHandlerInit();
8054
8055 GROW;
8056
8057 /*
8058 * SAX: beginning of the document processing.
8059 */
8060 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8061 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8062
8063 /*
8064 * Get the 4 first bytes and decode the charset
8065 * if enc != XML_CHAR_ENCODING_NONE
8066 * plug some encoding conversion routines.
8067 */
8068 start[0] = RAW;
8069 start[1] = NXT(1);
8070 start[2] = NXT(2);
8071 start[3] = NXT(3);
8072 enc = xmlDetectCharEncoding(start, 4);
8073 if (enc != XML_CHAR_ENCODING_NONE) {
8074 xmlSwitchEncoding(ctxt, enc);
8075 }
8076
8077
8078 if (CUR == 0) {
8079 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8081 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8082 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008083 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008084 }
8085
8086 /*
8087 * Check for the XMLDecl in the Prolog.
8088 */
8089 GROW;
8090 if ((RAW == '<') && (NXT(1) == '?') &&
8091 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8092 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8093
8094 /*
8095 * Note that we will switch encoding on the fly.
8096 */
8097 xmlParseXMLDecl(ctxt);
8098 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8099 /*
8100 * The XML REC instructs us to stop parsing right here
8101 */
8102 return(-1);
8103 }
8104 SKIP_BLANKS;
8105 } else {
8106 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8107 }
8108 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8109 ctxt->sax->startDocument(ctxt->userData);
8110
8111 /*
8112 * Doing validity checking on chunk doesn't make sense
8113 */
8114 ctxt->instate = XML_PARSER_CONTENT;
8115 ctxt->validate = 0;
8116 ctxt->loadsubset = 0;
8117 ctxt->depth = 0;
8118
8119 xmlParseContent(ctxt);
8120
8121 if ((RAW == '<') && (NXT(1) == '/')) {
8122 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8124 ctxt->sax->error(ctxt->userData,
8125 "chunk is not well balanced\n");
8126 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008127 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008128 } else if (RAW != 0) {
8129 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8130 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8131 ctxt->sax->error(ctxt->userData,
8132 "extra content at the end of well balanced chunk\n");
8133 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008134 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008135 }
8136
8137 /*
8138 * SAX: end of the document processing.
8139 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008140 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008141 ctxt->sax->endDocument(ctxt->userData);
8142
8143 if (! ctxt->wellFormed) return(-1);
8144 return(0);
8145}
8146
8147/************************************************************************
8148 * *
8149 * Progressive parsing interfaces *
8150 * *
8151 ************************************************************************/
8152
8153/**
8154 * xmlParseLookupSequence:
8155 * @ctxt: an XML parser context
8156 * @first: the first char to lookup
8157 * @next: the next char to lookup or zero
8158 * @third: the next char to lookup or zero
8159 *
8160 * Try to find if a sequence (first, next, third) or just (first next) or
8161 * (first) is available in the input stream.
8162 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8163 * to avoid rescanning sequences of bytes, it DOES change the state of the
8164 * parser, do not use liberally.
8165 *
8166 * Returns the index to the current parsing point if the full sequence
8167 * is available, -1 otherwise.
8168 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008169static int
Owen Taylor3473f882001-02-23 17:55:21 +00008170xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8171 xmlChar next, xmlChar third) {
8172 int base, len;
8173 xmlParserInputPtr in;
8174 const xmlChar *buf;
8175
8176 in = ctxt->input;
8177 if (in == NULL) return(-1);
8178 base = in->cur - in->base;
8179 if (base < 0) return(-1);
8180 if (ctxt->checkIndex > base)
8181 base = ctxt->checkIndex;
8182 if (in->buf == NULL) {
8183 buf = in->base;
8184 len = in->length;
8185 } else {
8186 buf = in->buf->buffer->content;
8187 len = in->buf->buffer->use;
8188 }
8189 /* take into account the sequence length */
8190 if (third) len -= 2;
8191 else if (next) len --;
8192 for (;base < len;base++) {
8193 if (buf[base] == first) {
8194 if (third != 0) {
8195 if ((buf[base + 1] != next) ||
8196 (buf[base + 2] != third)) continue;
8197 } else if (next != 0) {
8198 if (buf[base + 1] != next) continue;
8199 }
8200 ctxt->checkIndex = 0;
8201#ifdef DEBUG_PUSH
8202 if (next == 0)
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: lookup '%c' found at %d\n",
8205 first, base);
8206 else if (third == 0)
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: lookup '%c%c' found at %d\n",
8209 first, next, base);
8210 else
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: lookup '%c%c%c' found at %d\n",
8213 first, next, third, base);
8214#endif
8215 return(base - (in->cur - in->base));
8216 }
8217 }
8218 ctxt->checkIndex = base;
8219#ifdef DEBUG_PUSH
8220 if (next == 0)
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: lookup '%c' failed\n", first);
8223 else if (third == 0)
8224 xmlGenericError(xmlGenericErrorContext,
8225 "PP: lookup '%c%c' failed\n", first, next);
8226 else
8227 xmlGenericError(xmlGenericErrorContext,
8228 "PP: lookup '%c%c%c' failed\n", first, next, third);
8229#endif
8230 return(-1);
8231}
8232
8233/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008234 * xmlParseGetLasts:
8235 * @ctxt: an XML parser context
8236 * @lastlt: pointer to store the last '<' from the input
8237 * @lastgt: pointer to store the last '>' from the input
8238 *
8239 * Lookup the last < and > in the current chunk
8240 */
8241static void
8242xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8243 const xmlChar **lastgt) {
8244 const xmlChar *tmp;
8245
8246 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8247 xmlGenericError(xmlGenericErrorContext,
8248 "Internal error: xmlParseGetLasts\n");
8249 return;
8250 }
8251 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8252 tmp = ctxt->input->end;
8253 tmp--;
8254 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8255 (*tmp != '>')) tmp--;
8256 if (tmp < ctxt->input->base) {
8257 *lastlt = NULL;
8258 *lastgt = NULL;
8259 } else if (*tmp == '<') {
8260 *lastlt = tmp;
8261 tmp--;
8262 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8263 if (tmp < ctxt->input->base)
8264 *lastgt = NULL;
8265 else
8266 *lastgt = tmp;
8267 } else {
8268 *lastgt = tmp;
8269 tmp--;
8270 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8271 if (tmp < ctxt->input->base)
8272 *lastlt = NULL;
8273 else
8274 *lastlt = tmp;
8275 }
8276
8277 } else {
8278 *lastlt = NULL;
8279 *lastgt = NULL;
8280 }
8281}
8282/**
Owen Taylor3473f882001-02-23 17:55:21 +00008283 * xmlParseTryOrFinish:
8284 * @ctxt: an XML parser context
8285 * @terminate: last chunk indicator
8286 *
8287 * Try to progress on parsing
8288 *
8289 * Returns zero if no parsing was possible
8290 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008291static int
Owen Taylor3473f882001-02-23 17:55:21 +00008292xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8293 int ret = 0;
8294 int avail;
8295 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008296 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008297
8298#ifdef DEBUG_PUSH
8299 switch (ctxt->instate) {
8300 case XML_PARSER_EOF:
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: try EOF\n"); break;
8303 case XML_PARSER_START:
8304 xmlGenericError(xmlGenericErrorContext,
8305 "PP: try START\n"); break;
8306 case XML_PARSER_MISC:
8307 xmlGenericError(xmlGenericErrorContext,
8308 "PP: try MISC\n");break;
8309 case XML_PARSER_COMMENT:
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: try COMMENT\n");break;
8312 case XML_PARSER_PROLOG:
8313 xmlGenericError(xmlGenericErrorContext,
8314 "PP: try PROLOG\n");break;
8315 case XML_PARSER_START_TAG:
8316 xmlGenericError(xmlGenericErrorContext,
8317 "PP: try START_TAG\n");break;
8318 case XML_PARSER_CONTENT:
8319 xmlGenericError(xmlGenericErrorContext,
8320 "PP: try CONTENT\n");break;
8321 case XML_PARSER_CDATA_SECTION:
8322 xmlGenericError(xmlGenericErrorContext,
8323 "PP: try CDATA_SECTION\n");break;
8324 case XML_PARSER_END_TAG:
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: try END_TAG\n");break;
8327 case XML_PARSER_ENTITY_DECL:
8328 xmlGenericError(xmlGenericErrorContext,
8329 "PP: try ENTITY_DECL\n");break;
8330 case XML_PARSER_ENTITY_VALUE:
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: try ENTITY_VALUE\n");break;
8333 case XML_PARSER_ATTRIBUTE_VALUE:
8334 xmlGenericError(xmlGenericErrorContext,
8335 "PP: try ATTRIBUTE_VALUE\n");break;
8336 case XML_PARSER_DTD:
8337 xmlGenericError(xmlGenericErrorContext,
8338 "PP: try DTD\n");break;
8339 case XML_PARSER_EPILOG:
8340 xmlGenericError(xmlGenericErrorContext,
8341 "PP: try EPILOG\n");break;
8342 case XML_PARSER_PI:
8343 xmlGenericError(xmlGenericErrorContext,
8344 "PP: try PI\n");break;
8345 case XML_PARSER_IGNORE:
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: try IGNORE\n");break;
8348 }
8349#endif
8350
Daniel Veillarda880b122003-04-21 21:36:41 +00008351 if (ctxt->input->cur - ctxt->input->base > 4096) {
8352 xmlSHRINK(ctxt);
8353 ctxt->checkIndex = 0;
8354 }
8355 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008356
Daniel Veillarda880b122003-04-21 21:36:41 +00008357 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008358 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8359 return(0);
8360
8361
Owen Taylor3473f882001-02-23 17:55:21 +00008362 /*
8363 * Pop-up of finished entities.
8364 */
8365 while ((RAW == 0) && (ctxt->inputNr > 1))
8366 xmlPopInput(ctxt);
8367
8368 if (ctxt->input ==NULL) break;
8369 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008370 avail = ctxt->input->length -
8371 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008372 else {
8373 /*
8374 * If we are operating on converted input, try to flush
8375 * remainng chars to avoid them stalling in the non-converted
8376 * buffer.
8377 */
8378 if ((ctxt->input->buf->raw != NULL) &&
8379 (ctxt->input->buf->raw->use > 0)) {
8380 int base = ctxt->input->base -
8381 ctxt->input->buf->buffer->content;
8382 int current = ctxt->input->cur - ctxt->input->base;
8383
8384 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8385 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8386 ctxt->input->cur = ctxt->input->base + current;
8387 ctxt->input->end =
8388 &ctxt->input->buf->buffer->content[
8389 ctxt->input->buf->buffer->use];
8390 }
8391 avail = ctxt->input->buf->buffer->use -
8392 (ctxt->input->cur - ctxt->input->base);
8393 }
Owen Taylor3473f882001-02-23 17:55:21 +00008394 if (avail < 1)
8395 goto done;
8396 switch (ctxt->instate) {
8397 case XML_PARSER_EOF:
8398 /*
8399 * Document parsing is done !
8400 */
8401 goto done;
8402 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008403 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8404 xmlChar start[4];
8405 xmlCharEncoding enc;
8406
8407 /*
8408 * Very first chars read from the document flow.
8409 */
8410 if (avail < 4)
8411 goto done;
8412
8413 /*
8414 * Get the 4 first bytes and decode the charset
8415 * if enc != XML_CHAR_ENCODING_NONE
8416 * plug some encoding conversion routines.
8417 */
8418 start[0] = RAW;
8419 start[1] = NXT(1);
8420 start[2] = NXT(2);
8421 start[3] = NXT(3);
8422 enc = xmlDetectCharEncoding(start, 4);
8423 if (enc != XML_CHAR_ENCODING_NONE) {
8424 xmlSwitchEncoding(ctxt, enc);
8425 }
8426 break;
8427 }
Owen Taylor3473f882001-02-23 17:55:21 +00008428
8429 cur = ctxt->input->cur[0];
8430 next = ctxt->input->cur[1];
8431 if (cur == 0) {
8432 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8433 ctxt->sax->setDocumentLocator(ctxt->userData,
8434 &xmlDefaultSAXLocator);
8435 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8437 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8438 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008439 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 ctxt->instate = XML_PARSER_EOF;
8441#ifdef DEBUG_PUSH
8442 xmlGenericError(xmlGenericErrorContext,
8443 "PP: entering EOF\n");
8444#endif
8445 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8446 ctxt->sax->endDocument(ctxt->userData);
8447 goto done;
8448 }
8449 if ((cur == '<') && (next == '?')) {
8450 /* PI or XML decl */
8451 if (avail < 5) return(ret);
8452 if ((!terminate) &&
8453 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8454 return(ret);
8455 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8456 ctxt->sax->setDocumentLocator(ctxt->userData,
8457 &xmlDefaultSAXLocator);
8458 if ((ctxt->input->cur[2] == 'x') &&
8459 (ctxt->input->cur[3] == 'm') &&
8460 (ctxt->input->cur[4] == 'l') &&
8461 (IS_BLANK(ctxt->input->cur[5]))) {
8462 ret += 5;
8463#ifdef DEBUG_PUSH
8464 xmlGenericError(xmlGenericErrorContext,
8465 "PP: Parsing XML Decl\n");
8466#endif
8467 xmlParseXMLDecl(ctxt);
8468 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8469 /*
8470 * The XML REC instructs us to stop parsing right
8471 * here
8472 */
8473 ctxt->instate = XML_PARSER_EOF;
8474 return(0);
8475 }
8476 ctxt->standalone = ctxt->input->standalone;
8477 if ((ctxt->encoding == NULL) &&
8478 (ctxt->input->encoding != NULL))
8479 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8480 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8481 (!ctxt->disableSAX))
8482 ctxt->sax->startDocument(ctxt->userData);
8483 ctxt->instate = XML_PARSER_MISC;
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: entering MISC\n");
8487#endif
8488 } else {
8489 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8490 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8491 (!ctxt->disableSAX))
8492 ctxt->sax->startDocument(ctxt->userData);
8493 ctxt->instate = XML_PARSER_MISC;
8494#ifdef DEBUG_PUSH
8495 xmlGenericError(xmlGenericErrorContext,
8496 "PP: entering MISC\n");
8497#endif
8498 }
8499 } else {
8500 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8501 ctxt->sax->setDocumentLocator(ctxt->userData,
8502 &xmlDefaultSAXLocator);
8503 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8504 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8505 (!ctxt->disableSAX))
8506 ctxt->sax->startDocument(ctxt->userData);
8507 ctxt->instate = XML_PARSER_MISC;
8508#ifdef DEBUG_PUSH
8509 xmlGenericError(xmlGenericErrorContext,
8510 "PP: entering MISC\n");
8511#endif
8512 }
8513 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008514 case XML_PARSER_START_TAG: {
8515 xmlChar *name, *oldname;
8516
8517 if ((avail < 2) && (ctxt->inputNr == 1))
8518 goto done;
8519 cur = ctxt->input->cur[0];
8520 if (cur != '<') {
8521 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8523 ctxt->sax->error(ctxt->userData,
8524 "Start tag expect, '<' not found\n");
8525 ctxt->wellFormed = 0;
8526 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8527 ctxt->instate = XML_PARSER_EOF;
8528#ifdef DEBUG_PUSH
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: entering EOF\n");
8531#endif
8532 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8533 ctxt->sax->endDocument(ctxt->userData);
8534 goto done;
8535 }
8536 if (!terminate) {
8537 if (ctxt->progressive) {
8538 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8539 goto done;
8540 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8541 goto done;
8542 }
8543 }
8544 if (ctxt->spaceNr == 0)
8545 spacePush(ctxt, -1);
8546 else
8547 spacePush(ctxt, *ctxt->space);
8548 name = xmlParseStartTag(ctxt);
8549 if (name == NULL) {
8550 spacePop(ctxt);
8551 ctxt->instate = XML_PARSER_EOF;
8552#ifdef DEBUG_PUSH
8553 xmlGenericError(xmlGenericErrorContext,
8554 "PP: entering EOF\n");
8555#endif
8556 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8557 ctxt->sax->endDocument(ctxt->userData);
8558 goto done;
8559 }
8560 namePush(ctxt, name);
8561
8562 /*
8563 * [ VC: Root Element Type ]
8564 * The Name in the document type declaration must match
8565 * the element type of the root element.
8566 */
8567 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8568 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8569 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8570
8571 /*
8572 * Check for an Empty Element.
8573 */
8574 if ((RAW == '/') && (NXT(1) == '>')) {
8575 SKIP(2);
8576 if ((ctxt->sax != NULL) &&
8577 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8578 ctxt->sax->endElement(ctxt->userData, name);
8579 oldname = namePop(ctxt);
8580 spacePop(ctxt);
8581 if (oldname != NULL) {
8582#ifdef DEBUG_STACK
8583 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8584#endif
8585 xmlFree(oldname);
8586 }
8587 if (ctxt->name == NULL) {
8588 ctxt->instate = XML_PARSER_EPILOG;
8589#ifdef DEBUG_PUSH
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: entering EPILOG\n");
8592#endif
8593 } else {
8594 ctxt->instate = XML_PARSER_CONTENT;
8595#ifdef DEBUG_PUSH
8596 xmlGenericError(xmlGenericErrorContext,
8597 "PP: entering CONTENT\n");
8598#endif
8599 }
8600 break;
8601 }
8602 if (RAW == '>') {
8603 NEXT;
8604 } else {
8605 ctxt->errNo = XML_ERR_GT_REQUIRED;
8606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8607 ctxt->sax->error(ctxt->userData,
8608 "Couldn't find end of Start Tag %s\n",
8609 name);
8610 ctxt->wellFormed = 0;
8611 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8612
8613 /*
8614 * end of parsing of this node.
8615 */
8616 nodePop(ctxt);
8617 oldname = namePop(ctxt);
8618 spacePop(ctxt);
8619 if (oldname != NULL) {
8620#ifdef DEBUG_STACK
8621 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8622#endif
8623 xmlFree(oldname);
8624 }
8625 }
8626 ctxt->instate = XML_PARSER_CONTENT;
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext,
8629 "PP: entering CONTENT\n");
8630#endif
8631 break;
8632 }
8633 case XML_PARSER_CONTENT: {
8634 const xmlChar *test;
8635 unsigned int cons;
8636 if ((avail < 2) && (ctxt->inputNr == 1))
8637 goto done;
8638 cur = ctxt->input->cur[0];
8639 next = ctxt->input->cur[1];
8640
8641 test = CUR_PTR;
8642 cons = ctxt->input->consumed;
8643 if ((cur == '<') && (next == '/')) {
8644 ctxt->instate = XML_PARSER_END_TAG;
8645#ifdef DEBUG_PUSH
8646 xmlGenericError(xmlGenericErrorContext,
8647 "PP: entering END_TAG\n");
8648#endif
8649 break;
8650 } else if ((cur == '<') && (next == '?')) {
8651 if ((!terminate) &&
8652 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8653 goto done;
8654#ifdef DEBUG_PUSH
8655 xmlGenericError(xmlGenericErrorContext,
8656 "PP: Parsing PI\n");
8657#endif
8658 xmlParsePI(ctxt);
8659 } else if ((cur == '<') && (next != '!')) {
8660 ctxt->instate = XML_PARSER_START_TAG;
8661#ifdef DEBUG_PUSH
8662 xmlGenericError(xmlGenericErrorContext,
8663 "PP: entering START_TAG\n");
8664#endif
8665 break;
8666 } else if ((cur == '<') && (next == '!') &&
8667 (ctxt->input->cur[2] == '-') &&
8668 (ctxt->input->cur[3] == '-')) {
8669 if ((!terminate) &&
8670 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8671 goto done;
8672#ifdef DEBUG_PUSH
8673 xmlGenericError(xmlGenericErrorContext,
8674 "PP: Parsing Comment\n");
8675#endif
8676 xmlParseComment(ctxt);
8677 ctxt->instate = XML_PARSER_CONTENT;
8678 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8679 (ctxt->input->cur[2] == '[') &&
8680 (ctxt->input->cur[3] == 'C') &&
8681 (ctxt->input->cur[4] == 'D') &&
8682 (ctxt->input->cur[5] == 'A') &&
8683 (ctxt->input->cur[6] == 'T') &&
8684 (ctxt->input->cur[7] == 'A') &&
8685 (ctxt->input->cur[8] == '[')) {
8686 SKIP(9);
8687 ctxt->instate = XML_PARSER_CDATA_SECTION;
8688#ifdef DEBUG_PUSH
8689 xmlGenericError(xmlGenericErrorContext,
8690 "PP: entering CDATA_SECTION\n");
8691#endif
8692 break;
8693 } else if ((cur == '<') && (next == '!') &&
8694 (avail < 9)) {
8695 goto done;
8696 } else if (cur == '&') {
8697 if ((!terminate) &&
8698 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8699 goto done;
8700#ifdef DEBUG_PUSH
8701 xmlGenericError(xmlGenericErrorContext,
8702 "PP: Parsing Reference\n");
8703#endif
8704 xmlParseReference(ctxt);
8705 } else {
8706 /* TODO Avoid the extra copy, handle directly !!! */
8707 /*
8708 * Goal of the following test is:
8709 * - minimize calls to the SAX 'character' callback
8710 * when they are mergeable
8711 * - handle an problem for isBlank when we only parse
8712 * a sequence of blank chars and the next one is
8713 * not available to check against '<' presence.
8714 * - tries to homogenize the differences in SAX
8715 * callbacks between the push and pull versions
8716 * of the parser.
8717 */
8718 if ((ctxt->inputNr == 1) &&
8719 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8720 if (!terminate) {
8721 if (ctxt->progressive) {
8722 if ((lastlt == NULL) ||
8723 (ctxt->input->cur > lastlt))
8724 goto done;
8725 } else if (xmlParseLookupSequence(ctxt,
8726 '<', 0, 0) < 0) {
8727 goto done;
8728 }
8729 }
8730 }
8731 ctxt->checkIndex = 0;
8732#ifdef DEBUG_PUSH
8733 xmlGenericError(xmlGenericErrorContext,
8734 "PP: Parsing char data\n");
8735#endif
8736 xmlParseCharData(ctxt, 0);
8737 }
8738 /*
8739 * Pop-up of finished entities.
8740 */
8741 while ((RAW == 0) && (ctxt->inputNr > 1))
8742 xmlPopInput(ctxt);
8743 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8744 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8746 ctxt->sax->error(ctxt->userData,
8747 "detected an error in element content\n");
8748 ctxt->wellFormed = 0;
8749 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8750 ctxt->instate = XML_PARSER_EOF;
8751 break;
8752 }
8753 break;
8754 }
8755 case XML_PARSER_END_TAG:
8756 if (avail < 2)
8757 goto done;
8758 if (!terminate) {
8759 if (ctxt->progressive) {
8760 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8761 goto done;
8762 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8763 goto done;
8764 }
8765 }
8766 xmlParseEndTag(ctxt);
8767 if (ctxt->name == NULL) {
8768 ctxt->instate = XML_PARSER_EPILOG;
8769#ifdef DEBUG_PUSH
8770 xmlGenericError(xmlGenericErrorContext,
8771 "PP: entering EPILOG\n");
8772#endif
8773 } else {
8774 ctxt->instate = XML_PARSER_CONTENT;
8775#ifdef DEBUG_PUSH
8776 xmlGenericError(xmlGenericErrorContext,
8777 "PP: entering CONTENT\n");
8778#endif
8779 }
8780 break;
8781 case XML_PARSER_CDATA_SECTION: {
8782 /*
8783 * The Push mode need to have the SAX callback for
8784 * cdataBlock merge back contiguous callbacks.
8785 */
8786 int base;
8787
8788 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8789 if (base < 0) {
8790 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8791 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8792 if (ctxt->sax->cdataBlock != NULL)
8793 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8794 XML_PARSER_BIG_BUFFER_SIZE);
8795 }
8796 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8797 ctxt->checkIndex = 0;
8798 }
8799 goto done;
8800 } else {
8801 if ((ctxt->sax != NULL) && (base > 0) &&
8802 (!ctxt->disableSAX)) {
8803 if (ctxt->sax->cdataBlock != NULL)
8804 ctxt->sax->cdataBlock(ctxt->userData,
8805 ctxt->input->cur, base);
8806 }
8807 SKIP(base + 3);
8808 ctxt->checkIndex = 0;
8809 ctxt->instate = XML_PARSER_CONTENT;
8810#ifdef DEBUG_PUSH
8811 xmlGenericError(xmlGenericErrorContext,
8812 "PP: entering CONTENT\n");
8813#endif
8814 }
8815 break;
8816 }
Owen Taylor3473f882001-02-23 17:55:21 +00008817 case XML_PARSER_MISC:
8818 SKIP_BLANKS;
8819 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008820 avail = ctxt->input->length -
8821 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008822 else
Daniel Veillarda880b122003-04-21 21:36:41 +00008823 avail = ctxt->input->buf->buffer->use -
8824 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00008825 if (avail < 2)
8826 goto done;
8827 cur = ctxt->input->cur[0];
8828 next = ctxt->input->cur[1];
8829 if ((cur == '<') && (next == '?')) {
8830 if ((!terminate) &&
8831 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8832 goto done;
8833#ifdef DEBUG_PUSH
8834 xmlGenericError(xmlGenericErrorContext,
8835 "PP: Parsing PI\n");
8836#endif
8837 xmlParsePI(ctxt);
8838 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008839 (ctxt->input->cur[2] == '-') &&
8840 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008841 if ((!terminate) &&
8842 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8843 goto done;
8844#ifdef DEBUG_PUSH
8845 xmlGenericError(xmlGenericErrorContext,
8846 "PP: Parsing Comment\n");
8847#endif
8848 xmlParseComment(ctxt);
8849 ctxt->instate = XML_PARSER_MISC;
8850 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00008851 (ctxt->input->cur[2] == 'D') &&
8852 (ctxt->input->cur[3] == 'O') &&
8853 (ctxt->input->cur[4] == 'C') &&
8854 (ctxt->input->cur[5] == 'T') &&
8855 (ctxt->input->cur[6] == 'Y') &&
8856 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008857 (ctxt->input->cur[8] == 'E')) {
8858 if ((!terminate) &&
8859 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8860 goto done;
8861#ifdef DEBUG_PUSH
8862 xmlGenericError(xmlGenericErrorContext,
8863 "PP: Parsing internal subset\n");
8864#endif
8865 ctxt->inSubset = 1;
8866 xmlParseDocTypeDecl(ctxt);
8867 if (RAW == '[') {
8868 ctxt->instate = XML_PARSER_DTD;
8869#ifdef DEBUG_PUSH
8870 xmlGenericError(xmlGenericErrorContext,
8871 "PP: entering DTD\n");
8872#endif
8873 } else {
8874 /*
8875 * Create and update the external subset.
8876 */
8877 ctxt->inSubset = 2;
8878 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8879 (ctxt->sax->externalSubset != NULL))
8880 ctxt->sax->externalSubset(ctxt->userData,
8881 ctxt->intSubName, ctxt->extSubSystem,
8882 ctxt->extSubURI);
8883 ctxt->inSubset = 0;
8884 ctxt->instate = XML_PARSER_PROLOG;
8885#ifdef DEBUG_PUSH
8886 xmlGenericError(xmlGenericErrorContext,
8887 "PP: entering PROLOG\n");
8888#endif
8889 }
8890 } else if ((cur == '<') && (next == '!') &&
8891 (avail < 9)) {
8892 goto done;
8893 } else {
8894 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008895 ctxt->progressive = 1;
8896 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008897#ifdef DEBUG_PUSH
8898 xmlGenericError(xmlGenericErrorContext,
8899 "PP: entering START_TAG\n");
8900#endif
8901 }
8902 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008903 case XML_PARSER_PROLOG:
8904 SKIP_BLANKS;
8905 if (ctxt->input->buf == NULL)
8906 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8907 else
8908 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8909 if (avail < 2)
8910 goto done;
8911 cur = ctxt->input->cur[0];
8912 next = ctxt->input->cur[1];
8913 if ((cur == '<') && (next == '?')) {
8914 if ((!terminate) &&
8915 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8916 goto done;
8917#ifdef DEBUG_PUSH
8918 xmlGenericError(xmlGenericErrorContext,
8919 "PP: Parsing PI\n");
8920#endif
8921 xmlParsePI(ctxt);
8922 } else if ((cur == '<') && (next == '!') &&
8923 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8924 if ((!terminate) &&
8925 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8926 goto done;
8927#ifdef DEBUG_PUSH
8928 xmlGenericError(xmlGenericErrorContext,
8929 "PP: Parsing Comment\n");
8930#endif
8931 xmlParseComment(ctxt);
8932 ctxt->instate = XML_PARSER_PROLOG;
8933 } else if ((cur == '<') && (next == '!') &&
8934 (avail < 4)) {
8935 goto done;
8936 } else {
8937 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00008938 ctxt->progressive = 1;
8939 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00008940#ifdef DEBUG_PUSH
8941 xmlGenericError(xmlGenericErrorContext,
8942 "PP: entering START_TAG\n");
8943#endif
8944 }
8945 break;
8946 case XML_PARSER_EPILOG:
8947 SKIP_BLANKS;
8948 if (ctxt->input->buf == NULL)
8949 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8950 else
8951 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8952 if (avail < 2)
8953 goto done;
8954 cur = ctxt->input->cur[0];
8955 next = ctxt->input->cur[1];
8956 if ((cur == '<') && (next == '?')) {
8957 if ((!terminate) &&
8958 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8959 goto done;
8960#ifdef DEBUG_PUSH
8961 xmlGenericError(xmlGenericErrorContext,
8962 "PP: Parsing PI\n");
8963#endif
8964 xmlParsePI(ctxt);
8965 ctxt->instate = XML_PARSER_EPILOG;
8966 } else if ((cur == '<') && (next == '!') &&
8967 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8968 if ((!terminate) &&
8969 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8970 goto done;
8971#ifdef DEBUG_PUSH
8972 xmlGenericError(xmlGenericErrorContext,
8973 "PP: Parsing Comment\n");
8974#endif
8975 xmlParseComment(ctxt);
8976 ctxt->instate = XML_PARSER_EPILOG;
8977 } else if ((cur == '<') && (next == '!') &&
8978 (avail < 4)) {
8979 goto done;
8980 } else {
8981 ctxt->errNo = XML_ERR_DOCUMENT_END;
8982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8983 ctxt->sax->error(ctxt->userData,
8984 "Extra content at the end of the document\n");
8985 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008986 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008987 ctxt->instate = XML_PARSER_EOF;
8988#ifdef DEBUG_PUSH
8989 xmlGenericError(xmlGenericErrorContext,
8990 "PP: entering EOF\n");
8991#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008992 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008993 ctxt->sax->endDocument(ctxt->userData);
8994 goto done;
8995 }
8996 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008997 case XML_PARSER_DTD: {
8998 /*
8999 * Sorry but progressive parsing of the internal subset
9000 * is not expected to be supported. We first check that
9001 * the full content of the internal subset is available and
9002 * the parsing is launched only at that point.
9003 * Internal subset ends up with "']' S? '>'" in an unescaped
9004 * section and not in a ']]>' sequence which are conditional
9005 * sections (whoever argued to keep that crap in XML deserve
9006 * a place in hell !).
9007 */
9008 int base, i;
9009 xmlChar *buf;
9010 xmlChar quote = 0;
9011
9012 base = ctxt->input->cur - ctxt->input->base;
9013 if (base < 0) return(0);
9014 if (ctxt->checkIndex > base)
9015 base = ctxt->checkIndex;
9016 buf = ctxt->input->buf->buffer->content;
9017 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9018 base++) {
9019 if (quote != 0) {
9020 if (buf[base] == quote)
9021 quote = 0;
9022 continue;
9023 }
9024 if (buf[base] == '"') {
9025 quote = '"';
9026 continue;
9027 }
9028 if (buf[base] == '\'') {
9029 quote = '\'';
9030 continue;
9031 }
9032 if (buf[base] == ']') {
9033 if ((unsigned int) base +1 >=
9034 ctxt->input->buf->buffer->use)
9035 break;
9036 if (buf[base + 1] == ']') {
9037 /* conditional crap, skip both ']' ! */
9038 base++;
9039 continue;
9040 }
9041 for (i = 0;
9042 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9043 i++) {
9044 if (buf[base + i] == '>')
9045 goto found_end_int_subset;
9046 }
9047 break;
9048 }
9049 }
9050 /*
9051 * We didn't found the end of the Internal subset
9052 */
9053 if (quote == 0)
9054 ctxt->checkIndex = base;
9055#ifdef DEBUG_PUSH
9056 if (next == 0)
9057 xmlGenericError(xmlGenericErrorContext,
9058 "PP: lookup of int subset end filed\n");
9059#endif
9060 goto done;
9061
9062found_end_int_subset:
9063 xmlParseInternalSubset(ctxt);
9064 ctxt->inSubset = 2;
9065 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9066 (ctxt->sax->externalSubset != NULL))
9067 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9068 ctxt->extSubSystem, ctxt->extSubURI);
9069 ctxt->inSubset = 0;
9070 ctxt->instate = XML_PARSER_PROLOG;
9071 ctxt->checkIndex = 0;
9072#ifdef DEBUG_PUSH
9073 xmlGenericError(xmlGenericErrorContext,
9074 "PP: entering PROLOG\n");
9075#endif
9076 break;
9077 }
9078 case XML_PARSER_COMMENT:
9079 xmlGenericError(xmlGenericErrorContext,
9080 "PP: internal error, state == COMMENT\n");
9081 ctxt->instate = XML_PARSER_CONTENT;
9082#ifdef DEBUG_PUSH
9083 xmlGenericError(xmlGenericErrorContext,
9084 "PP: entering CONTENT\n");
9085#endif
9086 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009087 case XML_PARSER_IGNORE:
9088 xmlGenericError(xmlGenericErrorContext,
9089 "PP: internal error, state == IGNORE");
9090 ctxt->instate = XML_PARSER_DTD;
9091#ifdef DEBUG_PUSH
9092 xmlGenericError(xmlGenericErrorContext,
9093 "PP: entering DTD\n");
9094#endif
9095 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009096 case XML_PARSER_PI:
9097 xmlGenericError(xmlGenericErrorContext,
9098 "PP: internal error, state == PI\n");
9099 ctxt->instate = XML_PARSER_CONTENT;
9100#ifdef DEBUG_PUSH
9101 xmlGenericError(xmlGenericErrorContext,
9102 "PP: entering CONTENT\n");
9103#endif
9104 break;
9105 case XML_PARSER_ENTITY_DECL:
9106 xmlGenericError(xmlGenericErrorContext,
9107 "PP: internal error, state == ENTITY_DECL\n");
9108 ctxt->instate = XML_PARSER_DTD;
9109#ifdef DEBUG_PUSH
9110 xmlGenericError(xmlGenericErrorContext,
9111 "PP: entering DTD\n");
9112#endif
9113 break;
9114 case XML_PARSER_ENTITY_VALUE:
9115 xmlGenericError(xmlGenericErrorContext,
9116 "PP: internal error, state == ENTITY_VALUE\n");
9117 ctxt->instate = XML_PARSER_CONTENT;
9118#ifdef DEBUG_PUSH
9119 xmlGenericError(xmlGenericErrorContext,
9120 "PP: entering DTD\n");
9121#endif
9122 break;
9123 case XML_PARSER_ATTRIBUTE_VALUE:
9124 xmlGenericError(xmlGenericErrorContext,
9125 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9126 ctxt->instate = XML_PARSER_START_TAG;
9127#ifdef DEBUG_PUSH
9128 xmlGenericError(xmlGenericErrorContext,
9129 "PP: entering START_TAG\n");
9130#endif
9131 break;
9132 case XML_PARSER_SYSTEM_LITERAL:
9133 xmlGenericError(xmlGenericErrorContext,
9134 "PP: internal error, state == SYSTEM_LITERAL\n");
9135 ctxt->instate = XML_PARSER_START_TAG;
9136#ifdef DEBUG_PUSH
9137 xmlGenericError(xmlGenericErrorContext,
9138 "PP: entering START_TAG\n");
9139#endif
9140 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009141 case XML_PARSER_PUBLIC_LITERAL:
9142 xmlGenericError(xmlGenericErrorContext,
9143 "PP: internal error, state == PUBLIC_LITERAL\n");
9144 ctxt->instate = XML_PARSER_START_TAG;
9145#ifdef DEBUG_PUSH
9146 xmlGenericError(xmlGenericErrorContext,
9147 "PP: entering START_TAG\n");
9148#endif
9149 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009150 }
9151 }
9152done:
9153#ifdef DEBUG_PUSH
9154 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9155#endif
9156 return(ret);
9157}
9158
9159/**
Owen Taylor3473f882001-02-23 17:55:21 +00009160 * xmlParseChunk:
9161 * @ctxt: an XML parser context
9162 * @chunk: an char array
9163 * @size: the size in byte of the chunk
9164 * @terminate: last chunk indicator
9165 *
9166 * Parse a Chunk of memory
9167 *
9168 * Returns zero if no error, the xmlParserErrors otherwise.
9169 */
9170int
9171xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9172 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009173 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9174 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009175 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9176 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9177 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9178 int cur = ctxt->input->cur - ctxt->input->base;
9179
9180 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9181 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9182 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009183 ctxt->input->end =
9184 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009185#ifdef DEBUG_PUSH
9186 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9187#endif
9188
Daniel Veillarda880b122003-04-21 21:36:41 +00009189#if 0
Owen Taylor3473f882001-02-23 17:55:21 +00009190 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9191 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda880b122003-04-21 21:36:41 +00009192#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009193 } else if (ctxt->instate != XML_PARSER_EOF) {
9194 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9195 xmlParserInputBufferPtr in = ctxt->input->buf;
9196 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9197 (in->raw != NULL)) {
9198 int nbchars;
9199
9200 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9201 if (nbchars < 0) {
9202 xmlGenericError(xmlGenericErrorContext,
9203 "xmlParseChunk: encoder error\n");
9204 return(XML_ERR_INVALID_ENCODING);
9205 }
9206 }
9207 }
9208 }
9209 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009210 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9211 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009212 if (terminate) {
9213 /*
9214 * Check for termination
9215 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009216 int avail = 0;
9217 if (ctxt->input->buf == NULL)
9218 avail = ctxt->input->length -
9219 (ctxt->input->cur - ctxt->input->base);
9220 else
9221 avail = ctxt->input->buf->buffer->use -
9222 (ctxt->input->cur - ctxt->input->base);
9223
Owen Taylor3473f882001-02-23 17:55:21 +00009224 if ((ctxt->instate != XML_PARSER_EOF) &&
9225 (ctxt->instate != XML_PARSER_EPILOG)) {
9226 ctxt->errNo = XML_ERR_DOCUMENT_END;
9227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9228 ctxt->sax->error(ctxt->userData,
9229 "Extra content at the end of the document\n");
9230 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009231 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009232 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009233 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
9234 ctxt->errNo = XML_ERR_DOCUMENT_END;
9235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9236 ctxt->sax->error(ctxt->userData,
9237 "Extra content at the end of the document\n");
9238 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009239 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009240
9241 }
Owen Taylor3473f882001-02-23 17:55:21 +00009242 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009243 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009244 ctxt->sax->endDocument(ctxt->userData);
9245 }
9246 ctxt->instate = XML_PARSER_EOF;
9247 }
9248 return((xmlParserErrors) ctxt->errNo);
9249}
9250
9251/************************************************************************
9252 * *
9253 * I/O front end functions to the parser *
9254 * *
9255 ************************************************************************/
9256
9257/**
9258 * xmlStopParser:
9259 * @ctxt: an XML parser context
9260 *
9261 * Blocks further parser processing
9262 */
9263void
9264xmlStopParser(xmlParserCtxtPtr ctxt) {
9265 ctxt->instate = XML_PARSER_EOF;
9266 if (ctxt->input != NULL)
9267 ctxt->input->cur = BAD_CAST"";
9268}
9269
9270/**
9271 * xmlCreatePushParserCtxt:
9272 * @sax: a SAX handler
9273 * @user_data: The user data returned on SAX callbacks
9274 * @chunk: a pointer to an array of chars
9275 * @size: number of chars in the array
9276 * @filename: an optional file name or URI
9277 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009278 * Create a parser context for using the XML parser in push mode.
9279 * If @buffer and @size are non-NULL, the data is used to detect
9280 * the encoding. The remaining characters will be parsed so they
9281 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009282 * To allow content encoding detection, @size should be >= 4
9283 * The value of @filename is used for fetching external entities
9284 * and error/warning reports.
9285 *
9286 * Returns the new parser context or NULL
9287 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009288
Owen Taylor3473f882001-02-23 17:55:21 +00009289xmlParserCtxtPtr
9290xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9291 const char *chunk, int size, const char *filename) {
9292 xmlParserCtxtPtr ctxt;
9293 xmlParserInputPtr inputStream;
9294 xmlParserInputBufferPtr buf;
9295 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9296
9297 /*
9298 * plug some encoding conversion routines
9299 */
9300 if ((chunk != NULL) && (size >= 4))
9301 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9302
9303 buf = xmlAllocParserInputBuffer(enc);
9304 if (buf == NULL) return(NULL);
9305
9306 ctxt = xmlNewParserCtxt();
9307 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009308 xmlGenericError(xmlGenericErrorContext,
9309 "xml parser: out of memory\n");
9310 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009311 return(NULL);
9312 }
9313 if (sax != NULL) {
9314 if (ctxt->sax != &xmlDefaultSAXHandler)
9315 xmlFree(ctxt->sax);
9316 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9317 if (ctxt->sax == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009318 xmlGenericError(xmlGenericErrorContext,
9319 "xml parser: out of memory\n");
9320 xmlFreeParserInputBuffer(buf);
9321 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009322 return(NULL);
9323 }
9324 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9325 if (user_data != NULL)
9326 ctxt->userData = user_data;
9327 }
9328 if (filename == NULL) {
9329 ctxt->directory = NULL;
9330 } else {
9331 ctxt->directory = xmlParserGetDirectory(filename);
9332 }
9333
9334 inputStream = xmlNewInputStream(ctxt);
9335 if (inputStream == NULL) {
9336 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009337 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009338 return(NULL);
9339 }
9340
9341 if (filename == NULL)
9342 inputStream->filename = NULL;
9343 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009344 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009345 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009346 inputStream->buf = buf;
9347 inputStream->base = inputStream->buf->buffer->content;
9348 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009349 inputStream->end =
9350 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009351
9352 inputPush(ctxt, inputStream);
9353
9354 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9355 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009356 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9357 int cur = ctxt->input->cur - ctxt->input->base;
9358
Owen Taylor3473f882001-02-23 17:55:21 +00009359 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009360
9361 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9362 ctxt->input->cur = ctxt->input->base + cur;
9363 ctxt->input->end =
9364 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009365#ifdef DEBUG_PUSH
9366 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9367#endif
9368 }
9369
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009370 if (enc != XML_CHAR_ENCODING_NONE) {
9371 xmlSwitchEncoding(ctxt, enc);
9372 }
9373
Owen Taylor3473f882001-02-23 17:55:21 +00009374 return(ctxt);
9375}
9376
9377/**
9378 * xmlCreateIOParserCtxt:
9379 * @sax: a SAX handler
9380 * @user_data: The user data returned on SAX callbacks
9381 * @ioread: an I/O read function
9382 * @ioclose: an I/O close function
9383 * @ioctx: an I/O handler
9384 * @enc: the charset encoding if known
9385 *
9386 * Create a parser context for using the XML parser with an existing
9387 * I/O stream
9388 *
9389 * Returns the new parser context or NULL
9390 */
9391xmlParserCtxtPtr
9392xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9393 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9394 void *ioctx, xmlCharEncoding enc) {
9395 xmlParserCtxtPtr ctxt;
9396 xmlParserInputPtr inputStream;
9397 xmlParserInputBufferPtr buf;
9398
9399 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9400 if (buf == NULL) return(NULL);
9401
9402 ctxt = xmlNewParserCtxt();
9403 if (ctxt == NULL) {
9404 xmlFree(buf);
9405 return(NULL);
9406 }
9407 if (sax != NULL) {
9408 if (ctxt->sax != &xmlDefaultSAXHandler)
9409 xmlFree(ctxt->sax);
9410 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9411 if (ctxt->sax == NULL) {
9412 xmlFree(buf);
9413 xmlFree(ctxt);
9414 return(NULL);
9415 }
9416 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9417 if (user_data != NULL)
9418 ctxt->userData = user_data;
9419 }
9420
9421 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9422 if (inputStream == NULL) {
9423 xmlFreeParserCtxt(ctxt);
9424 return(NULL);
9425 }
9426 inputPush(ctxt, inputStream);
9427
9428 return(ctxt);
9429}
9430
9431/************************************************************************
9432 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009433 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009434 * *
9435 ************************************************************************/
9436
9437/**
9438 * xmlIOParseDTD:
9439 * @sax: the SAX handler block or NULL
9440 * @input: an Input Buffer
9441 * @enc: the charset encoding if known
9442 *
9443 * Load and parse a DTD
9444 *
9445 * Returns the resulting xmlDtdPtr or NULL in case of error.
9446 * @input will be freed at parsing end.
9447 */
9448
9449xmlDtdPtr
9450xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9451 xmlCharEncoding enc) {
9452 xmlDtdPtr ret = NULL;
9453 xmlParserCtxtPtr ctxt;
9454 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009455 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009456
9457 if (input == NULL)
9458 return(NULL);
9459
9460 ctxt = xmlNewParserCtxt();
9461 if (ctxt == NULL) {
9462 return(NULL);
9463 }
9464
9465 /*
9466 * Set-up the SAX context
9467 */
9468 if (sax != NULL) {
9469 if (ctxt->sax != NULL)
9470 xmlFree(ctxt->sax);
9471 ctxt->sax = sax;
9472 ctxt->userData = NULL;
9473 }
9474
9475 /*
9476 * generate a parser input from the I/O handler
9477 */
9478
9479 pinput = xmlNewIOInputStream(ctxt, input, enc);
9480 if (pinput == NULL) {
9481 if (sax != NULL) ctxt->sax = NULL;
9482 xmlFreeParserCtxt(ctxt);
9483 return(NULL);
9484 }
9485
9486 /*
9487 * plug some encoding conversion routines here.
9488 */
9489 xmlPushInput(ctxt, pinput);
9490
9491 pinput->filename = NULL;
9492 pinput->line = 1;
9493 pinput->col = 1;
9494 pinput->base = ctxt->input->cur;
9495 pinput->cur = ctxt->input->cur;
9496 pinput->free = NULL;
9497
9498 /*
9499 * let's parse that entity knowing it's an external subset.
9500 */
9501 ctxt->inSubset = 2;
9502 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9503 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9504 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009505
9506 if (enc == XML_CHAR_ENCODING_NONE) {
9507 /*
9508 * Get the 4 first bytes and decode the charset
9509 * if enc != XML_CHAR_ENCODING_NONE
9510 * plug some encoding conversion routines.
9511 */
9512 start[0] = RAW;
9513 start[1] = NXT(1);
9514 start[2] = NXT(2);
9515 start[3] = NXT(3);
9516 enc = xmlDetectCharEncoding(start, 4);
9517 if (enc != XML_CHAR_ENCODING_NONE) {
9518 xmlSwitchEncoding(ctxt, enc);
9519 }
9520 }
9521
Owen Taylor3473f882001-02-23 17:55:21 +00009522 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9523
9524 if (ctxt->myDoc != NULL) {
9525 if (ctxt->wellFormed) {
9526 ret = ctxt->myDoc->extSubset;
9527 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009528 if (ret != NULL) {
9529 xmlNodePtr tmp;
9530
9531 ret->doc = NULL;
9532 tmp = ret->children;
9533 while (tmp != NULL) {
9534 tmp->doc = NULL;
9535 tmp = tmp->next;
9536 }
9537 }
Owen Taylor3473f882001-02-23 17:55:21 +00009538 } else {
9539 ret = NULL;
9540 }
9541 xmlFreeDoc(ctxt->myDoc);
9542 ctxt->myDoc = NULL;
9543 }
9544 if (sax != NULL) ctxt->sax = NULL;
9545 xmlFreeParserCtxt(ctxt);
9546
9547 return(ret);
9548}
9549
9550/**
9551 * xmlSAXParseDTD:
9552 * @sax: the SAX handler block
9553 * @ExternalID: a NAME* containing the External ID of the DTD
9554 * @SystemID: a NAME* containing the URL to the DTD
9555 *
9556 * Load and parse an external subset.
9557 *
9558 * Returns the resulting xmlDtdPtr or NULL in case of error.
9559 */
9560
9561xmlDtdPtr
9562xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9563 const xmlChar *SystemID) {
9564 xmlDtdPtr ret = NULL;
9565 xmlParserCtxtPtr ctxt;
9566 xmlParserInputPtr input = NULL;
9567 xmlCharEncoding enc;
9568
9569 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9570
9571 ctxt = xmlNewParserCtxt();
9572 if (ctxt == NULL) {
9573 return(NULL);
9574 }
9575
9576 /*
9577 * Set-up the SAX context
9578 */
9579 if (sax != NULL) {
9580 if (ctxt->sax != NULL)
9581 xmlFree(ctxt->sax);
9582 ctxt->sax = sax;
9583 ctxt->userData = NULL;
9584 }
9585
9586 /*
9587 * Ask the Entity resolver to load the damn thing
9588 */
9589
9590 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009591 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009592 if (input == NULL) {
9593 if (sax != NULL) ctxt->sax = NULL;
9594 xmlFreeParserCtxt(ctxt);
9595 return(NULL);
9596 }
9597
9598 /*
9599 * plug some encoding conversion routines here.
9600 */
9601 xmlPushInput(ctxt, input);
9602 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9603 xmlSwitchEncoding(ctxt, enc);
9604
9605 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +00009606 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009607 input->line = 1;
9608 input->col = 1;
9609 input->base = ctxt->input->cur;
9610 input->cur = ctxt->input->cur;
9611 input->free = NULL;
9612
9613 /*
9614 * let's parse that entity knowing it's an external subset.
9615 */
9616 ctxt->inSubset = 2;
9617 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9618 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9619 ExternalID, SystemID);
9620 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9621
9622 if (ctxt->myDoc != NULL) {
9623 if (ctxt->wellFormed) {
9624 ret = ctxt->myDoc->extSubset;
9625 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +00009626 if (ret != NULL) {
9627 xmlNodePtr tmp;
9628
9629 ret->doc = NULL;
9630 tmp = ret->children;
9631 while (tmp != NULL) {
9632 tmp->doc = NULL;
9633 tmp = tmp->next;
9634 }
9635 }
Owen Taylor3473f882001-02-23 17:55:21 +00009636 } else {
9637 ret = NULL;
9638 }
9639 xmlFreeDoc(ctxt->myDoc);
9640 ctxt->myDoc = NULL;
9641 }
9642 if (sax != NULL) ctxt->sax = NULL;
9643 xmlFreeParserCtxt(ctxt);
9644
9645 return(ret);
9646}
9647
9648/**
9649 * xmlParseDTD:
9650 * @ExternalID: a NAME* containing the External ID of the DTD
9651 * @SystemID: a NAME* containing the URL to the DTD
9652 *
9653 * Load and parse an external subset.
9654 *
9655 * Returns the resulting xmlDtdPtr or NULL in case of error.
9656 */
9657
9658xmlDtdPtr
9659xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9660 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9661}
9662
9663/************************************************************************
9664 * *
9665 * Front ends when parsing an Entity *
9666 * *
9667 ************************************************************************/
9668
9669/**
Owen Taylor3473f882001-02-23 17:55:21 +00009670 * xmlParseCtxtExternalEntity:
9671 * @ctx: the existing parsing context
9672 * @URL: the URL for the entity to load
9673 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009674 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009675 *
9676 * Parse an external general entity within an existing parsing context
9677 * An external general parsed entity is well-formed if it matches the
9678 * production labeled extParsedEnt.
9679 *
9680 * [78] extParsedEnt ::= TextDecl? content
9681 *
9682 * Returns 0 if the entity is well formed, -1 in case of args problem and
9683 * the parser error code otherwise
9684 */
9685
9686int
9687xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009688 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009689 xmlParserCtxtPtr ctxt;
9690 xmlDocPtr newDoc;
9691 xmlSAXHandlerPtr oldsax = NULL;
9692 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009693 xmlChar start[4];
9694 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009695
9696 if (ctx->depth > 40) {
9697 return(XML_ERR_ENTITY_LOOP);
9698 }
9699
Daniel Veillardcda96922001-08-21 10:56:31 +00009700 if (lst != NULL)
9701 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009702 if ((URL == NULL) && (ID == NULL))
9703 return(-1);
9704 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9705 return(-1);
9706
9707
9708 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9709 if (ctxt == NULL) return(-1);
9710 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +00009711 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +00009712 oldsax = ctxt->sax;
9713 ctxt->sax = ctx->sax;
9714 newDoc = xmlNewDoc(BAD_CAST "1.0");
9715 if (newDoc == NULL) {
9716 xmlFreeParserCtxt(ctxt);
9717 return(-1);
9718 }
9719 if (ctx->myDoc != NULL) {
9720 newDoc->intSubset = ctx->myDoc->intSubset;
9721 newDoc->extSubset = ctx->myDoc->extSubset;
9722 }
9723 if (ctx->myDoc->URL != NULL) {
9724 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9725 }
9726 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9727 if (newDoc->children == NULL) {
9728 ctxt->sax = oldsax;
9729 xmlFreeParserCtxt(ctxt);
9730 newDoc->intSubset = NULL;
9731 newDoc->extSubset = NULL;
9732 xmlFreeDoc(newDoc);
9733 return(-1);
9734 }
9735 nodePush(ctxt, newDoc->children);
9736 if (ctx->myDoc == NULL) {
9737 ctxt->myDoc = newDoc;
9738 } else {
9739 ctxt->myDoc = ctx->myDoc;
9740 newDoc->children->doc = ctx->myDoc;
9741 }
9742
Daniel Veillard87a764e2001-06-20 17:41:10 +00009743 /*
9744 * Get the 4 first bytes and decode the charset
9745 * if enc != XML_CHAR_ENCODING_NONE
9746 * plug some encoding conversion routines.
9747 */
9748 GROW
9749 start[0] = RAW;
9750 start[1] = NXT(1);
9751 start[2] = NXT(2);
9752 start[3] = NXT(3);
9753 enc = xmlDetectCharEncoding(start, 4);
9754 if (enc != XML_CHAR_ENCODING_NONE) {
9755 xmlSwitchEncoding(ctxt, enc);
9756 }
9757
Owen Taylor3473f882001-02-23 17:55:21 +00009758 /*
9759 * Parse a possible text declaration first
9760 */
Owen Taylor3473f882001-02-23 17:55:21 +00009761 if ((RAW == '<') && (NXT(1) == '?') &&
9762 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9763 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9764 xmlParseTextDecl(ctxt);
9765 }
9766
9767 /*
9768 * Doing validity checking on chunk doesn't make sense
9769 */
9770 ctxt->instate = XML_PARSER_CONTENT;
9771 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009772 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009773 ctxt->loadsubset = ctx->loadsubset;
9774 ctxt->depth = ctx->depth + 1;
9775 ctxt->replaceEntities = ctx->replaceEntities;
9776 if (ctxt->validate) {
9777 ctxt->vctxt.error = ctx->vctxt.error;
9778 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009779 } else {
9780 ctxt->vctxt.error = NULL;
9781 ctxt->vctxt.warning = NULL;
9782 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009783 ctxt->vctxt.nodeTab = NULL;
9784 ctxt->vctxt.nodeNr = 0;
9785 ctxt->vctxt.nodeMax = 0;
9786 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009787
9788 xmlParseContent(ctxt);
9789
Daniel Veillard5f8d1a32003-03-23 21:02:00 +00009790 ctx->validate = ctxt->validate;
9791 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +00009792 if ((RAW == '<') && (NXT(1) == '/')) {
9793 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9795 ctxt->sax->error(ctxt->userData,
9796 "chunk is not well balanced\n");
9797 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009798 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009799 } else if (RAW != 0) {
9800 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9802 ctxt->sax->error(ctxt->userData,
9803 "extra content at the end of well balanced chunk\n");
9804 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009805 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009806 }
9807 if (ctxt->node != newDoc->children) {
9808 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9810 ctxt->sax->error(ctxt->userData,
9811 "chunk is not well balanced\n");
9812 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009813 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009814 }
9815
9816 if (!ctxt->wellFormed) {
9817 if (ctxt->errNo == 0)
9818 ret = 1;
9819 else
9820 ret = ctxt->errNo;
9821 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009822 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009823 xmlNodePtr cur;
9824
9825 /*
9826 * Return the newly created nodeset after unlinking it from
9827 * they pseudo parent.
9828 */
9829 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009830 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009831 while (cur != NULL) {
9832 cur->parent = NULL;
9833 cur = cur->next;
9834 }
9835 newDoc->children->children = NULL;
9836 }
9837 ret = 0;
9838 }
9839 ctxt->sax = oldsax;
9840 xmlFreeParserCtxt(ctxt);
9841 newDoc->intSubset = NULL;
9842 newDoc->extSubset = NULL;
9843 xmlFreeDoc(newDoc);
9844
9845 return(ret);
9846}
9847
9848/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009849 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009850 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009851 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009852 * @sax: the SAX handler bloc (possibly NULL)
9853 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9854 * @depth: Used for loop detection, use 0
9855 * @URL: the URL for the entity to load
9856 * @ID: the System ID for the entity to load
9857 * @list: the return value for the set of parsed nodes
9858 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009859 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009860 *
9861 * Returns 0 if the entity is well formed, -1 in case of args problem and
9862 * the parser error code otherwise
9863 */
9864
Daniel Veillard257d9102001-05-08 10:41:44 +00009865static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009866xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9867 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009868 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009869 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009870 xmlParserCtxtPtr ctxt;
9871 xmlDocPtr newDoc;
9872 xmlSAXHandlerPtr oldsax = NULL;
9873 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009874 xmlChar start[4];
9875 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009876
9877 if (depth > 40) {
9878 return(XML_ERR_ENTITY_LOOP);
9879 }
9880
9881
9882
9883 if (list != NULL)
9884 *list = NULL;
9885 if ((URL == NULL) && (ID == NULL))
9886 return(-1);
9887 if (doc == NULL) /* @@ relax but check for dereferences */
9888 return(-1);
9889
9890
9891 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9892 if (ctxt == NULL) return(-1);
9893 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009894 if (oldctxt != NULL) {
9895 ctxt->_private = oldctxt->_private;
9896 ctxt->loadsubset = oldctxt->loadsubset;
9897 ctxt->validate = oldctxt->validate;
9898 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009899 ctxt->record_info = oldctxt->record_info;
9900 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
9901 ctxt->node_seq.length = oldctxt->node_seq.length;
9902 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009903 } else {
9904 /*
9905 * Doing validity checking on chunk without context
9906 * doesn't make sense
9907 */
9908 ctxt->_private = NULL;
9909 ctxt->validate = 0;
9910 ctxt->external = 2;
9911 ctxt->loadsubset = 0;
9912 }
Owen Taylor3473f882001-02-23 17:55:21 +00009913 if (sax != NULL) {
9914 oldsax = ctxt->sax;
9915 ctxt->sax = sax;
9916 if (user_data != NULL)
9917 ctxt->userData = user_data;
9918 }
9919 newDoc = xmlNewDoc(BAD_CAST "1.0");
9920 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009921 ctxt->node_seq.maximum = 0;
9922 ctxt->node_seq.length = 0;
9923 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009924 xmlFreeParserCtxt(ctxt);
9925 return(-1);
9926 }
9927 if (doc != NULL) {
9928 newDoc->intSubset = doc->intSubset;
9929 newDoc->extSubset = doc->extSubset;
9930 }
9931 if (doc->URL != NULL) {
9932 newDoc->URL = xmlStrdup(doc->URL);
9933 }
9934 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9935 if (newDoc->children == NULL) {
9936 if (sax != NULL)
9937 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +00009938 ctxt->node_seq.maximum = 0;
9939 ctxt->node_seq.length = 0;
9940 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009941 xmlFreeParserCtxt(ctxt);
9942 newDoc->intSubset = NULL;
9943 newDoc->extSubset = NULL;
9944 xmlFreeDoc(newDoc);
9945 return(-1);
9946 }
9947 nodePush(ctxt, newDoc->children);
9948 if (doc == NULL) {
9949 ctxt->myDoc = newDoc;
9950 } else {
9951 ctxt->myDoc = doc;
9952 newDoc->children->doc = doc;
9953 }
9954
Daniel Veillard87a764e2001-06-20 17:41:10 +00009955 /*
9956 * Get the 4 first bytes and decode the charset
9957 * if enc != XML_CHAR_ENCODING_NONE
9958 * plug some encoding conversion routines.
9959 */
9960 GROW;
9961 start[0] = RAW;
9962 start[1] = NXT(1);
9963 start[2] = NXT(2);
9964 start[3] = NXT(3);
9965 enc = xmlDetectCharEncoding(start, 4);
9966 if (enc != XML_CHAR_ENCODING_NONE) {
9967 xmlSwitchEncoding(ctxt, enc);
9968 }
9969
Owen Taylor3473f882001-02-23 17:55:21 +00009970 /*
9971 * Parse a possible text declaration first
9972 */
Owen Taylor3473f882001-02-23 17:55:21 +00009973 if ((RAW == '<') && (NXT(1) == '?') &&
9974 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9975 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9976 xmlParseTextDecl(ctxt);
9977 }
9978
Owen Taylor3473f882001-02-23 17:55:21 +00009979 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009980 ctxt->depth = depth;
9981
9982 xmlParseContent(ctxt);
9983
Daniel Veillard561b7f82002-03-20 21:55:57 +00009984 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009985 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9987 ctxt->sax->error(ctxt->userData,
9988 "chunk is not well balanced\n");
9989 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009990 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009991 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009992 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9994 ctxt->sax->error(ctxt->userData,
9995 "extra content at the end of well balanced chunk\n");
9996 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00009997 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009998 }
9999 if (ctxt->node != newDoc->children) {
10000 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10001 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10002 ctxt->sax->error(ctxt->userData,
10003 "chunk is not well balanced\n");
10004 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010005 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010006 }
10007
10008 if (!ctxt->wellFormed) {
10009 if (ctxt->errNo == 0)
10010 ret = 1;
10011 else
10012 ret = ctxt->errNo;
10013 } else {
10014 if (list != NULL) {
10015 xmlNodePtr cur;
10016
10017 /*
10018 * Return the newly created nodeset after unlinking it from
10019 * they pseudo parent.
10020 */
10021 cur = newDoc->children->children;
10022 *list = cur;
10023 while (cur != NULL) {
10024 cur->parent = NULL;
10025 cur = cur->next;
10026 }
10027 newDoc->children->children = NULL;
10028 }
10029 ret = 0;
10030 }
10031 if (sax != NULL)
10032 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010033 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10034 oldctxt->node_seq.length = ctxt->node_seq.length;
10035 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010036 ctxt->node_seq.maximum = 0;
10037 ctxt->node_seq.length = 0;
10038 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010039 xmlFreeParserCtxt(ctxt);
10040 newDoc->intSubset = NULL;
10041 newDoc->extSubset = NULL;
10042 xmlFreeDoc(newDoc);
10043
10044 return(ret);
10045}
10046
10047/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010048 * xmlParseExternalEntity:
10049 * @doc: the document the chunk pertains to
10050 * @sax: the SAX handler bloc (possibly NULL)
10051 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10052 * @depth: Used for loop detection, use 0
10053 * @URL: the URL for the entity to load
10054 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010055 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010056 *
10057 * Parse an external general entity
10058 * An external general parsed entity is well-formed if it matches the
10059 * production labeled extParsedEnt.
10060 *
10061 * [78] extParsedEnt ::= TextDecl? content
10062 *
10063 * Returns 0 if the entity is well formed, -1 in case of args problem and
10064 * the parser error code otherwise
10065 */
10066
10067int
10068xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010069 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010070 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010071 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010072}
10073
10074/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010075 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010076 * @doc: the document the chunk pertains to
10077 * @sax: the SAX handler bloc (possibly NULL)
10078 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10079 * @depth: Used for loop detection, use 0
10080 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010081 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010082 *
10083 * Parse a well-balanced chunk of an XML document
10084 * called by the parser
10085 * The allowed sequence for the Well Balanced Chunk is the one defined by
10086 * the content production in the XML grammar:
10087 *
10088 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10089 *
10090 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10091 * the parser error code otherwise
10092 */
10093
10094int
10095xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010096 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010097 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10098 depth, string, lst, 0 );
10099}
10100
10101/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010102 * xmlParseBalancedChunkMemoryInternal:
10103 * @oldctxt: the existing parsing context
10104 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10105 * @user_data: the user data field for the parser context
10106 * @lst: the return value for the set of parsed nodes
10107 *
10108 *
10109 * Parse a well-balanced chunk of an XML document
10110 * called by the parser
10111 * The allowed sequence for the Well Balanced Chunk is the one defined by
10112 * the content production in the XML grammar:
10113 *
10114 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10115 *
10116 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10117 * the parser error code otherwise
10118 *
10119 * In case recover is set to 1, the nodelist will not be empty even if
10120 * the parsed chunk is not well balanced.
10121 */
10122static int
10123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10124 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10125 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010126 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010127 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010128 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010129 int size;
10130 int ret = 0;
10131
10132 if (oldctxt->depth > 40) {
10133 return(XML_ERR_ENTITY_LOOP);
10134 }
10135
10136
10137 if (lst != NULL)
10138 *lst = NULL;
10139 if (string == NULL)
10140 return(-1);
10141
10142 size = xmlStrlen(string);
10143
10144 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10145 if (ctxt == NULL) return(-1);
10146 if (user_data != NULL)
10147 ctxt->userData = user_data;
10148 else
10149 ctxt->userData = ctxt;
10150
10151 oldsax = ctxt->sax;
10152 ctxt->sax = oldctxt->sax;
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010153 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010154 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010155 newDoc = xmlNewDoc(BAD_CAST "1.0");
10156 if (newDoc == NULL) {
10157 ctxt->sax = oldsax;
10158 xmlFreeParserCtxt(ctxt);
10159 return(-1);
10160 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010161 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010162 } else {
10163 ctxt->myDoc = oldctxt->myDoc;
10164 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010165 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010166 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010167 BAD_CAST "pseudoroot", NULL);
10168 if (ctxt->myDoc->children == NULL) {
10169 ctxt->sax = oldsax;
10170 xmlFreeParserCtxt(ctxt);
10171 if (newDoc != NULL)
10172 xmlFreeDoc(newDoc);
10173 return(-1);
10174 }
10175 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010176 ctxt->instate = XML_PARSER_CONTENT;
10177 ctxt->depth = oldctxt->depth + 1;
10178
Daniel Veillard328f48c2002-11-15 15:24:34 +000010179 ctxt->validate = 0;
10180 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010181 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10182 /*
10183 * ID/IDREF registration will be done in xmlValidateElement below
10184 */
10185 ctxt->loadsubset |= XML_SKIP_IDS;
10186 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010187
Daniel Veillard68e9e742002-11-16 15:35:11 +000010188 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010189 if ((RAW == '<') && (NXT(1) == '/')) {
10190 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10192 ctxt->sax->error(ctxt->userData,
10193 "chunk is not well balanced\n");
10194 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010195 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010196 } else if (RAW != 0) {
10197 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10199 ctxt->sax->error(ctxt->userData,
10200 "extra content at the end of well balanced chunk\n");
10201 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010202 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010203 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010204 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010205 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10207 ctxt->sax->error(ctxt->userData,
10208 "chunk is not well balanced\n");
10209 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010210 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010211 }
10212
10213 if (!ctxt->wellFormed) {
10214 if (ctxt->errNo == 0)
10215 ret = 1;
10216 else
10217 ret = ctxt->errNo;
10218 } else {
10219 ret = 0;
10220 }
10221
10222 if ((lst != NULL) && (ret == 0)) {
10223 xmlNodePtr cur;
10224
10225 /*
10226 * Return the newly created nodeset after unlinking it from
10227 * they pseudo parent.
10228 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010229 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010230 *lst = cur;
10231 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010232 if (oldctxt->validate && oldctxt->wellFormed &&
10233 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10234 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10235 oldctxt->myDoc, cur);
10236 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010237 cur->parent = NULL;
10238 cur = cur->next;
10239 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010240 ctxt->myDoc->children->children = NULL;
10241 }
10242 if (ctxt->myDoc != NULL) {
10243 xmlFreeNode(ctxt->myDoc->children);
10244 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010245 }
10246
10247 ctxt->sax = oldsax;
10248 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010249 if (newDoc != NULL)
10250 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010251
10252 return(ret);
10253}
10254
10255/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010256 * xmlParseBalancedChunkMemoryRecover:
10257 * @doc: the document the chunk pertains to
10258 * @sax: the SAX handler bloc (possibly NULL)
10259 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10260 * @depth: Used for loop detection, use 0
10261 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10262 * @lst: the return value for the set of parsed nodes
10263 * @recover: return nodes even if the data is broken (use 0)
10264 *
10265 *
10266 * Parse a well-balanced chunk of an XML document
10267 * called by the parser
10268 * The allowed sequence for the Well Balanced Chunk is the one defined by
10269 * the content production in the XML grammar:
10270 *
10271 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10272 *
10273 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10274 * the parser error code otherwise
10275 *
10276 * In case recover is set to 1, the nodelist will not be empty even if
10277 * the parsed chunk is not well balanced.
10278 */
10279int
10280xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10281 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10282 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010283 xmlParserCtxtPtr ctxt;
10284 xmlDocPtr newDoc;
10285 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010286 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010287 int size;
10288 int ret = 0;
10289
10290 if (depth > 40) {
10291 return(XML_ERR_ENTITY_LOOP);
10292 }
10293
10294
Daniel Veillardcda96922001-08-21 10:56:31 +000010295 if (lst != NULL)
10296 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010297 if (string == NULL)
10298 return(-1);
10299
10300 size = xmlStrlen(string);
10301
10302 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10303 if (ctxt == NULL) return(-1);
10304 ctxt->userData = ctxt;
10305 if (sax != NULL) {
10306 oldsax = ctxt->sax;
10307 ctxt->sax = sax;
10308 if (user_data != NULL)
10309 ctxt->userData = user_data;
10310 }
10311 newDoc = xmlNewDoc(BAD_CAST "1.0");
10312 if (newDoc == NULL) {
10313 xmlFreeParserCtxt(ctxt);
10314 return(-1);
10315 }
10316 if (doc != NULL) {
10317 newDoc->intSubset = doc->intSubset;
10318 newDoc->extSubset = doc->extSubset;
10319 }
10320 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10321 if (newDoc->children == NULL) {
10322 if (sax != NULL)
10323 ctxt->sax = oldsax;
10324 xmlFreeParserCtxt(ctxt);
10325 newDoc->intSubset = NULL;
10326 newDoc->extSubset = NULL;
10327 xmlFreeDoc(newDoc);
10328 return(-1);
10329 }
10330 nodePush(ctxt, newDoc->children);
10331 if (doc == NULL) {
10332 ctxt->myDoc = newDoc;
10333 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010334 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010335 newDoc->children->doc = doc;
10336 }
10337 ctxt->instate = XML_PARSER_CONTENT;
10338 ctxt->depth = depth;
10339
10340 /*
10341 * Doing validity checking on chunk doesn't make sense
10342 */
10343 ctxt->validate = 0;
10344 ctxt->loadsubset = 0;
10345
Daniel Veillardb39bc392002-10-26 19:29:51 +000010346 if ( doc != NULL ){
10347 content = doc->children;
10348 doc->children = NULL;
10349 xmlParseContent(ctxt);
10350 doc->children = content;
10351 }
10352 else {
10353 xmlParseContent(ctxt);
10354 }
Owen Taylor3473f882001-02-23 17:55:21 +000010355 if ((RAW == '<') && (NXT(1) == '/')) {
10356 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10358 ctxt->sax->error(ctxt->userData,
10359 "chunk is not well balanced\n");
10360 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010361 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010362 } else if (RAW != 0) {
10363 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10365 ctxt->sax->error(ctxt->userData,
10366 "extra content at the end of well balanced chunk\n");
10367 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010368 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010369 }
10370 if (ctxt->node != newDoc->children) {
10371 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10373 ctxt->sax->error(ctxt->userData,
10374 "chunk is not well balanced\n");
10375 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +000010376 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
10378
10379 if (!ctxt->wellFormed) {
10380 if (ctxt->errNo == 0)
10381 ret = 1;
10382 else
10383 ret = ctxt->errNo;
10384 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010385 ret = 0;
10386 }
10387
10388 if (lst != NULL && (ret == 0 || recover == 1)) {
10389 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010390
10391 /*
10392 * Return the newly created nodeset after unlinking it from
10393 * they pseudo parent.
10394 */
10395 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010396 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010397 while (cur != NULL) {
10398 cur->parent = NULL;
10399 cur = cur->next;
10400 }
10401 newDoc->children->children = NULL;
10402 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010403
Owen Taylor3473f882001-02-23 17:55:21 +000010404 if (sax != NULL)
10405 ctxt->sax = oldsax;
10406 xmlFreeParserCtxt(ctxt);
10407 newDoc->intSubset = NULL;
10408 newDoc->extSubset = NULL;
10409 xmlFreeDoc(newDoc);
10410
10411 return(ret);
10412}
10413
10414/**
10415 * xmlSAXParseEntity:
10416 * @sax: the SAX handler block
10417 * @filename: the filename
10418 *
10419 * parse an XML external entity out of context and build a tree.
10420 * It use the given SAX function block to handle the parsing callback.
10421 * If sax is NULL, fallback to the default DOM tree building routines.
10422 *
10423 * [78] extParsedEnt ::= TextDecl? content
10424 *
10425 * This correspond to a "Well Balanced" chunk
10426 *
10427 * Returns the resulting document tree
10428 */
10429
10430xmlDocPtr
10431xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10432 xmlDocPtr ret;
10433 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010434
10435 ctxt = xmlCreateFileParserCtxt(filename);
10436 if (ctxt == NULL) {
10437 return(NULL);
10438 }
10439 if (sax != NULL) {
10440 if (ctxt->sax != NULL)
10441 xmlFree(ctxt->sax);
10442 ctxt->sax = sax;
10443 ctxt->userData = NULL;
10444 }
10445
Owen Taylor3473f882001-02-23 17:55:21 +000010446 xmlParseExtParsedEnt(ctxt);
10447
10448 if (ctxt->wellFormed)
10449 ret = ctxt->myDoc;
10450 else {
10451 ret = NULL;
10452 xmlFreeDoc(ctxt->myDoc);
10453 ctxt->myDoc = NULL;
10454 }
10455 if (sax != NULL)
10456 ctxt->sax = NULL;
10457 xmlFreeParserCtxt(ctxt);
10458
10459 return(ret);
10460}
10461
10462/**
10463 * xmlParseEntity:
10464 * @filename: the filename
10465 *
10466 * parse an XML external entity out of context and build a tree.
10467 *
10468 * [78] extParsedEnt ::= TextDecl? content
10469 *
10470 * This correspond to a "Well Balanced" chunk
10471 *
10472 * Returns the resulting document tree
10473 */
10474
10475xmlDocPtr
10476xmlParseEntity(const char *filename) {
10477 return(xmlSAXParseEntity(NULL, filename));
10478}
10479
10480/**
10481 * xmlCreateEntityParserCtxt:
10482 * @URL: the entity URL
10483 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010484 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010485 *
10486 * Create a parser context for an external entity
10487 * Automatic support for ZLIB/Compress compressed document is provided
10488 * by default if found at compile-time.
10489 *
10490 * Returns the new parser context or NULL
10491 */
10492xmlParserCtxtPtr
10493xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10494 const xmlChar *base) {
10495 xmlParserCtxtPtr ctxt;
10496 xmlParserInputPtr inputStream;
10497 char *directory = NULL;
10498 xmlChar *uri;
10499
10500 ctxt = xmlNewParserCtxt();
10501 if (ctxt == NULL) {
10502 return(NULL);
10503 }
10504
10505 uri = xmlBuildURI(URL, base);
10506
10507 if (uri == NULL) {
10508 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10509 if (inputStream == NULL) {
10510 xmlFreeParserCtxt(ctxt);
10511 return(NULL);
10512 }
10513
10514 inputPush(ctxt, inputStream);
10515
10516 if ((ctxt->directory == NULL) && (directory == NULL))
10517 directory = xmlParserGetDirectory((char *)URL);
10518 if ((ctxt->directory == NULL) && (directory != NULL))
10519 ctxt->directory = directory;
10520 } else {
10521 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10522 if (inputStream == NULL) {
10523 xmlFree(uri);
10524 xmlFreeParserCtxt(ctxt);
10525 return(NULL);
10526 }
10527
10528 inputPush(ctxt, inputStream);
10529
10530 if ((ctxt->directory == NULL) && (directory == NULL))
10531 directory = xmlParserGetDirectory((char *)uri);
10532 if ((ctxt->directory == NULL) && (directory != NULL))
10533 ctxt->directory = directory;
10534 xmlFree(uri);
10535 }
10536
10537 return(ctxt);
10538}
10539
10540/************************************************************************
10541 * *
10542 * Front ends when parsing from a file *
10543 * *
10544 ************************************************************************/
10545
10546/**
10547 * xmlCreateFileParserCtxt:
10548 * @filename: the filename
10549 *
10550 * Create a parser context for a file content.
10551 * Automatic support for ZLIB/Compress compressed document is provided
10552 * by default if found at compile-time.
10553 *
10554 * Returns the new parser context or NULL
10555 */
10556xmlParserCtxtPtr
10557xmlCreateFileParserCtxt(const char *filename)
10558{
10559 xmlParserCtxtPtr ctxt;
10560 xmlParserInputPtr inputStream;
Igor Zlatkovicce076162003-02-23 13:39:39 +000010561 char *canonicFilename;
Owen Taylor3473f882001-02-23 17:55:21 +000010562 char *directory = NULL;
10563
Owen Taylor3473f882001-02-23 17:55:21 +000010564 ctxt = xmlNewParserCtxt();
10565 if (ctxt == NULL) {
10566 if (xmlDefaultSAXHandler.error != NULL) {
10567 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10568 }
10569 return(NULL);
10570 }
10571
Daniel Veillardc64b8e92003-02-24 11:47:13 +000010572 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010573 if (canonicFilename == NULL) {
10574 if (xmlDefaultSAXHandler.error != NULL) {
10575 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
10576 }
10577 return(NULL);
10578 }
10579
10580 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
10581 xmlFree(canonicFilename);
Owen Taylor3473f882001-02-23 17:55:21 +000010582 if (inputStream == NULL) {
10583 xmlFreeParserCtxt(ctxt);
10584 return(NULL);
10585 }
10586
Owen Taylor3473f882001-02-23 17:55:21 +000010587 inputPush(ctxt, inputStream);
10588 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010589 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010590 if ((ctxt->directory == NULL) && (directory != NULL))
10591 ctxt->directory = directory;
10592
10593 return(ctxt);
10594}
10595
10596/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010597 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010598 * @sax: the SAX handler block
10599 * @filename: the filename
10600 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10601 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010602 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010603 *
10604 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10605 * compressed document is provided by default if found at compile-time.
10606 * It use the given SAX function block to handle the parsing callback.
10607 * If sax is NULL, fallback to the default DOM tree building routines.
10608 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000010609 * User data (void *) is stored within the parser context in the
10610 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000010611 *
Owen Taylor3473f882001-02-23 17:55:21 +000010612 * Returns the resulting document tree
10613 */
10614
10615xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000010616xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
10617 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000010618 xmlDocPtr ret;
10619 xmlParserCtxtPtr ctxt;
10620 char *directory = NULL;
10621
Daniel Veillard635ef722001-10-29 11:48:19 +000010622 xmlInitParser();
10623
Owen Taylor3473f882001-02-23 17:55:21 +000010624 ctxt = xmlCreateFileParserCtxt(filename);
10625 if (ctxt == NULL) {
10626 return(NULL);
10627 }
10628 if (sax != NULL) {
10629 if (ctxt->sax != NULL)
10630 xmlFree(ctxt->sax);
10631 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010632 }
Daniel Veillarda293c322001-10-02 13:54:14 +000010633 if (data!=NULL) {
10634 ctxt->_private=data;
10635 }
Owen Taylor3473f882001-02-23 17:55:21 +000010636
10637 if ((ctxt->directory == NULL) && (directory == NULL))
10638 directory = xmlParserGetDirectory(filename);
10639 if ((ctxt->directory == NULL) && (directory != NULL))
10640 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
10641
Daniel Veillarddad3f682002-11-17 16:47:27 +000010642 ctxt->recovery = recovery;
10643
Owen Taylor3473f882001-02-23 17:55:21 +000010644 xmlParseDocument(ctxt);
10645
10646 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10647 else {
10648 ret = NULL;
10649 xmlFreeDoc(ctxt->myDoc);
10650 ctxt->myDoc = NULL;
10651 }
10652 if (sax != NULL)
10653 ctxt->sax = NULL;
10654 xmlFreeParserCtxt(ctxt);
10655
10656 return(ret);
10657}
10658
10659/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010660 * xmlSAXParseFile:
10661 * @sax: the SAX handler block
10662 * @filename: the filename
10663 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10664 * documents
10665 *
10666 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10667 * compressed document is provided by default if found at compile-time.
10668 * It use the given SAX function block to handle the parsing callback.
10669 * If sax is NULL, fallback to the default DOM tree building routines.
10670 *
10671 * Returns the resulting document tree
10672 */
10673
10674xmlDocPtr
10675xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10676 int recovery) {
10677 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10678}
10679
10680/**
Owen Taylor3473f882001-02-23 17:55:21 +000010681 * xmlRecoverDoc:
10682 * @cur: a pointer to an array of xmlChar
10683 *
10684 * parse an XML in-memory document and build a tree.
10685 * In the case the document is not Well Formed, a tree is built anyway
10686 *
10687 * Returns the resulting document tree
10688 */
10689
10690xmlDocPtr
10691xmlRecoverDoc(xmlChar *cur) {
10692 return(xmlSAXParseDoc(NULL, cur, 1));
10693}
10694
10695/**
10696 * xmlParseFile:
10697 * @filename: the filename
10698 *
10699 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10700 * compressed document is provided by default if found at compile-time.
10701 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010702 * Returns the resulting document tree if the file was wellformed,
10703 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010704 */
10705
10706xmlDocPtr
10707xmlParseFile(const char *filename) {
10708 return(xmlSAXParseFile(NULL, filename, 0));
10709}
10710
10711/**
10712 * xmlRecoverFile:
10713 * @filename: the filename
10714 *
10715 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10716 * compressed document is provided by default if found at compile-time.
10717 * In the case the document is not Well Formed, a tree is built anyway
10718 *
10719 * Returns the resulting document tree
10720 */
10721
10722xmlDocPtr
10723xmlRecoverFile(const char *filename) {
10724 return(xmlSAXParseFile(NULL, filename, 1));
10725}
10726
10727
10728/**
10729 * xmlSetupParserForBuffer:
10730 * @ctxt: an XML parser context
10731 * @buffer: a xmlChar * buffer
10732 * @filename: a file name
10733 *
10734 * Setup the parser context to parse a new buffer; Clears any prior
10735 * contents from the parser context. The buffer parameter must not be
10736 * NULL, but the filename parameter can be
10737 */
10738void
10739xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10740 const char* filename)
10741{
10742 xmlParserInputPtr input;
10743
10744 input = xmlNewInputStream(ctxt);
10745 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000010746 xmlGenericError(xmlGenericErrorContext,
10747 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000010748 xmlFree(ctxt);
10749 return;
10750 }
10751
10752 xmlClearParserCtxt(ctxt);
10753 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000010754 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010755 input->base = buffer;
10756 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010757 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010758 inputPush(ctxt, input);
10759}
10760
10761/**
10762 * xmlSAXUserParseFile:
10763 * @sax: a SAX handler
10764 * @user_data: The user data returned on SAX callbacks
10765 * @filename: a file name
10766 *
10767 * parse an XML file and call the given SAX handler routines.
10768 * Automatic support for ZLIB/Compress compressed document is provided
10769 *
10770 * Returns 0 in case of success or a error number otherwise
10771 */
10772int
10773xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10774 const char *filename) {
10775 int ret = 0;
10776 xmlParserCtxtPtr ctxt;
10777
10778 ctxt = xmlCreateFileParserCtxt(filename);
10779 if (ctxt == NULL) return -1;
10780 if (ctxt->sax != &xmlDefaultSAXHandler)
10781 xmlFree(ctxt->sax);
10782 ctxt->sax = sax;
10783 if (user_data != NULL)
10784 ctxt->userData = user_data;
10785
10786 xmlParseDocument(ctxt);
10787
10788 if (ctxt->wellFormed)
10789 ret = 0;
10790 else {
10791 if (ctxt->errNo != 0)
10792 ret = ctxt->errNo;
10793 else
10794 ret = -1;
10795 }
10796 if (sax != NULL)
10797 ctxt->sax = NULL;
10798 xmlFreeParserCtxt(ctxt);
10799
10800 return ret;
10801}
10802
10803/************************************************************************
10804 * *
10805 * Front ends when parsing from memory *
10806 * *
10807 ************************************************************************/
10808
10809/**
10810 * xmlCreateMemoryParserCtxt:
10811 * @buffer: a pointer to a char array
10812 * @size: the size of the array
10813 *
10814 * Create a parser context for an XML in-memory document.
10815 *
10816 * Returns the new parser context or NULL
10817 */
10818xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010819xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010820 xmlParserCtxtPtr ctxt;
10821 xmlParserInputPtr input;
10822 xmlParserInputBufferPtr buf;
10823
10824 if (buffer == NULL)
10825 return(NULL);
10826 if (size <= 0)
10827 return(NULL);
10828
10829 ctxt = xmlNewParserCtxt();
10830 if (ctxt == NULL)
10831 return(NULL);
10832
10833 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010834 if (buf == NULL) {
10835 xmlFreeParserCtxt(ctxt);
10836 return(NULL);
10837 }
Owen Taylor3473f882001-02-23 17:55:21 +000010838
10839 input = xmlNewInputStream(ctxt);
10840 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000010841 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010842 xmlFreeParserCtxt(ctxt);
10843 return(NULL);
10844 }
10845
10846 input->filename = NULL;
10847 input->buf = buf;
10848 input->base = input->buf->buffer->content;
10849 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010850 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010851
10852 inputPush(ctxt, input);
10853 return(ctxt);
10854}
10855
10856/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010857 * xmlSAXParseMemoryWithData:
10858 * @sax: the SAX handler block
10859 * @buffer: an pointer to a char array
10860 * @size: the size of the array
10861 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10862 * documents
10863 * @data: the userdata
10864 *
10865 * parse an XML in-memory block and use the given SAX function block
10866 * to handle the parsing callback. If sax is NULL, fallback to the default
10867 * DOM tree building routines.
10868 *
10869 * User data (void *) is stored within the parser context in the
10870 * context's _private member, so it is available nearly everywhere in libxml
10871 *
10872 * Returns the resulting document tree
10873 */
10874
10875xmlDocPtr
10876xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
10877 int size, int recovery, void *data) {
10878 xmlDocPtr ret;
10879 xmlParserCtxtPtr ctxt;
10880
10881 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10882 if (ctxt == NULL) return(NULL);
10883 if (sax != NULL) {
10884 if (ctxt->sax != NULL)
10885 xmlFree(ctxt->sax);
10886 ctxt->sax = sax;
10887 }
10888 if (data!=NULL) {
10889 ctxt->_private=data;
10890 }
10891
Daniel Veillardadba5f12003-04-04 16:09:01 +000010892 ctxt->recovery = recovery;
10893
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010894 xmlParseDocument(ctxt);
10895
10896 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10897 else {
10898 ret = NULL;
10899 xmlFreeDoc(ctxt->myDoc);
10900 ctxt->myDoc = NULL;
10901 }
10902 if (sax != NULL)
10903 ctxt->sax = NULL;
10904 xmlFreeParserCtxt(ctxt);
10905
10906 return(ret);
10907}
10908
10909/**
Owen Taylor3473f882001-02-23 17:55:21 +000010910 * xmlSAXParseMemory:
10911 * @sax: the SAX handler block
10912 * @buffer: an pointer to a char array
10913 * @size: the size of the array
10914 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10915 * documents
10916 *
10917 * parse an XML in-memory block and use the given SAX function block
10918 * to handle the parsing callback. If sax is NULL, fallback to the default
10919 * DOM tree building routines.
10920 *
10921 * Returns the resulting document tree
10922 */
10923xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010924xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10925 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000010926 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010927}
10928
10929/**
10930 * xmlParseMemory:
10931 * @buffer: an pointer to a char array
10932 * @size: the size of the array
10933 *
10934 * parse an XML in-memory block and build a tree.
10935 *
10936 * Returns the resulting document tree
10937 */
10938
Daniel Veillard50822cb2001-07-26 20:05:51 +000010939xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010940 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10941}
10942
10943/**
10944 * xmlRecoverMemory:
10945 * @buffer: an pointer to a char array
10946 * @size: the size of the array
10947 *
10948 * parse an XML in-memory block and build a tree.
10949 * In the case the document is not Well Formed, a tree is built anyway
10950 *
10951 * Returns the resulting document tree
10952 */
10953
Daniel Veillard50822cb2001-07-26 20:05:51 +000010954xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010955 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10956}
10957
10958/**
10959 * xmlSAXUserParseMemory:
10960 * @sax: a SAX handler
10961 * @user_data: The user data returned on SAX callbacks
10962 * @buffer: an in-memory XML document input
10963 * @size: the length of the XML document in bytes
10964 *
10965 * A better SAX parsing routine.
10966 * parse an XML in-memory buffer and call the given SAX handler routines.
10967 *
10968 * Returns 0 in case of success or a error number otherwise
10969 */
10970int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010971 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010972 int ret = 0;
10973 xmlParserCtxtPtr ctxt;
10974 xmlSAXHandlerPtr oldsax = NULL;
10975
Daniel Veillard9e923512002-08-14 08:48:52 +000010976 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000010977 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10978 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000010979 oldsax = ctxt->sax;
10980 ctxt->sax = sax;
Daniel Veillard30211a02001-04-26 09:33:18 +000010981 if (user_data != NULL)
10982 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010983
10984 xmlParseDocument(ctxt);
10985
10986 if (ctxt->wellFormed)
10987 ret = 0;
10988 else {
10989 if (ctxt->errNo != 0)
10990 ret = ctxt->errNo;
10991 else
10992 ret = -1;
10993 }
Daniel Veillard9e923512002-08-14 08:48:52 +000010994 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000010995 xmlFreeParserCtxt(ctxt);
10996
10997 return ret;
10998}
10999
11000/**
11001 * xmlCreateDocParserCtxt:
11002 * @cur: a pointer to an array of xmlChar
11003 *
11004 * Creates a parser context for an XML in-memory document.
11005 *
11006 * Returns the new parser context or NULL
11007 */
11008xmlParserCtxtPtr
11009xmlCreateDocParserCtxt(xmlChar *cur) {
11010 int len;
11011
11012 if (cur == NULL)
11013 return(NULL);
11014 len = xmlStrlen(cur);
11015 return(xmlCreateMemoryParserCtxt((char *)cur, len));
11016}
11017
11018/**
11019 * xmlSAXParseDoc:
11020 * @sax: the SAX handler block
11021 * @cur: a pointer to an array of xmlChar
11022 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11023 * documents
11024 *
11025 * parse an XML in-memory document and build a tree.
11026 * It use the given SAX function block to handle the parsing callback.
11027 * If sax is NULL, fallback to the default DOM tree building routines.
11028 *
11029 * Returns the resulting document tree
11030 */
11031
11032xmlDocPtr
11033xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11034 xmlDocPtr ret;
11035 xmlParserCtxtPtr ctxt;
11036
11037 if (cur == NULL) return(NULL);
11038
11039
11040 ctxt = xmlCreateDocParserCtxt(cur);
11041 if (ctxt == NULL) return(NULL);
11042 if (sax != NULL) {
11043 ctxt->sax = sax;
11044 ctxt->userData = NULL;
11045 }
11046
11047 xmlParseDocument(ctxt);
11048 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11049 else {
11050 ret = NULL;
11051 xmlFreeDoc(ctxt->myDoc);
11052 ctxt->myDoc = NULL;
11053 }
11054 if (sax != NULL)
11055 ctxt->sax = NULL;
11056 xmlFreeParserCtxt(ctxt);
11057
11058 return(ret);
11059}
11060
11061/**
11062 * xmlParseDoc:
11063 * @cur: a pointer to an array of xmlChar
11064 *
11065 * parse an XML in-memory document and build a tree.
11066 *
11067 * Returns the resulting document tree
11068 */
11069
11070xmlDocPtr
11071xmlParseDoc(xmlChar *cur) {
11072 return(xmlSAXParseDoc(NULL, cur, 0));
11073}
11074
Daniel Veillard8107a222002-01-13 14:10:10 +000011075/************************************************************************
11076 * *
11077 * Specific function to keep track of entities references *
11078 * and used by the XSLT debugger *
11079 * *
11080 ************************************************************************/
11081
11082static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11083
11084/**
11085 * xmlAddEntityReference:
11086 * @ent : A valid entity
11087 * @firstNode : A valid first node for children of entity
11088 * @lastNode : A valid last node of children entity
11089 *
11090 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11091 */
11092static void
11093xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11094 xmlNodePtr lastNode)
11095{
11096 if (xmlEntityRefFunc != NULL) {
11097 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11098 }
11099}
11100
11101
11102/**
11103 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011104 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011105 *
11106 * Set the function to call call back when a xml reference has been made
11107 */
11108void
11109xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11110{
11111 xmlEntityRefFunc = func;
11112}
Owen Taylor3473f882001-02-23 17:55:21 +000011113
11114/************************************************************************
11115 * *
11116 * Miscellaneous *
11117 * *
11118 ************************************************************************/
11119
11120#ifdef LIBXML_XPATH_ENABLED
11121#include <libxml/xpath.h>
11122#endif
11123
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011124extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011125static int xmlParserInitialized = 0;
11126
11127/**
11128 * xmlInitParser:
11129 *
11130 * Initialization function for the XML parser.
11131 * This is not reentrant. Call once before processing in case of
11132 * use in multithreaded programs.
11133 */
11134
11135void
11136xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011137 if (xmlParserInitialized != 0)
11138 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011139
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011140 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11141 (xmlGenericError == NULL))
11142 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011143 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011144 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011145 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011146 xmlInitCharEncodingHandlers();
11147 xmlInitializePredefinedEntities();
11148 xmlDefaultSAXHandlerInit();
11149 xmlRegisterDefaultInputCallbacks();
11150 xmlRegisterDefaultOutputCallbacks();
11151#ifdef LIBXML_HTML_ENABLED
11152 htmlInitAutoClose();
11153 htmlDefaultSAXHandlerInit();
11154#endif
11155#ifdef LIBXML_XPATH_ENABLED
11156 xmlXPathInit();
11157#endif
11158 xmlParserInitialized = 1;
11159}
11160
11161/**
11162 * xmlCleanupParser:
11163 *
11164 * Cleanup function for the XML parser. It tries to reclaim all
11165 * parsing related global memory allocated for the parser processing.
11166 * It doesn't deallocate any document related memory. Calling this
11167 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011168 * One should call xmlCleanupParser() only when the process has
11169 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011170 */
11171
11172void
11173xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000011174 xmlCleanupCharEncodingHandlers();
11175 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011176#ifdef LIBXML_CATALOG_ENABLED
11177 xmlCatalogCleanup();
11178#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011179 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011180 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011181 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011182}