blob: c3ea2a3c990691f733ad24a0ca381d2a4514d0ca [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
Daniel Veillardb1059e22000-09-16 14:02:43 +00002 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
Daniel Veillard260a68f1998-08-13 03:39:55 +00004 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00005 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
Daniel Veillardb1059e22000-09-16 14:02:43 +000013 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000028 * See Copyright for the status of this software.
29 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000030 * Daniel.Veillard@w3.org
Daniel Veillardce6e98d2000-11-25 09:54:49 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
Daniel Veillard260a68f1998-08-13 03:39:55 +000034 */
35
36#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000037#include "win32config.h"
Daniel Veillard39c7d712000-09-10 16:14:55 +000038#define XML_DIR_SEP '\\'
Daniel Veillard260a68f1998-08-13 03:39:55 +000039#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000040#include "config.h"
Daniel Veillard39c7d712000-09-10 16:14:55 +000041#define XML_DIR_SEP '/'
Daniel Veillard260a68f1998-08-13 03:39:55 +000042#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000043
Daniel Veillard260a68f1998-08-13 03:39:55 +000044#include <stdio.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000045#include <stdlib.h>
Daniel Veillard32bc74e2000-07-14 14:49:25 +000046#include <string.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000047#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
Daniel Veillard7f7d1111999-09-22 09:46:25 +000058#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
Seth Alvese7f12e61998-10-01 20:51:15 +000062#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000063#endif
64#ifdef HAVE_SYS_STAT_H
Daniel Veillard260a68f1998-08-13 03:39:55 +000065#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000066#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +000067#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
Daniel Veillard260a68f1998-08-13 03:39:55 +000077
Daniel Veillarddbfd6411999-12-28 16:35:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 1000
79#define XML_PARSER_BUFFER_SIZE 100
80
Daniel Veillardb1059e22000-09-16 14:02:43 +000081/*
82 * Various global defaults for parsing
83 */
Daniel Veillardcf461992000-03-14 18:30:20 +000084int xmlGetWarningsDefaultValue = 1;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +000085int xmlParserDebugEntities = 0;
Daniel Veillardce6e98d2000-11-25 09:54:49 +000086#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
Daniel Veillardb1059e22000-09-16 14:02:43 +000092int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillardce6e98d2000-11-25 09:54:49 +000094#endif
Daniel Veillardb1059e22000-09-16 14:02:43 +000095int xmlPedanticParserDefaultValue = 0;
96int xmlKeepBlanksDefaultValue = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +000097
Daniel Veillard3c558c31999-12-22 11:30:41 +000098/*
99 * List of XML prefixed PI allowed by W3C specs
100 */
101
102const char *xmlW3CPIs[] = {
103 "xml-stylesheet",
104 NULL
105};
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000106
Daniel Veillardb1059e22000-09-16 14:02:43 +0000107/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillardcf461992000-03-14 18:30:20 +0000108void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110 const xmlChar **str);
Daniel Veillardbe803962000-06-28 23:40:59 +0000111
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000112
Daniel Veillard260a68f1998-08-13 03:39:55 +0000113/************************************************************************
114 * *
115 * Parser stacks related functions and macros *
116 * *
117 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000118
Daniel Veillard10a2c651999-12-12 13:03:50 +0000119xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
120 const xmlChar ** str);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000121
Daniel Veillard260a68f1998-08-13 03:39:55 +0000122/*
123 * Generic function for accessing stacks in the Parser Context
124 */
125
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000126#define PUSH_AND_POP(scope, type, name) \
127scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000128 if (ctxt->name##Nr >= ctxt->name##Max) { \
129 ctxt->name##Max *= 2; \
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000130 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000131 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
132 if (ctxt->name##Tab == NULL) { \
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000133 xmlGenericError(xmlGenericErrorContext, \
134 "realloc failed !\n"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000135 return(0); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000136 } \
137 } \
138 ctxt->name##Tab[ctxt->name##Nr] = value; \
139 ctxt->name = value; \
140 return(ctxt->name##Nr++); \
141} \
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000142scope type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000143 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000144 if (ctxt->name##Nr <= 0) return(0); \
145 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000146 if (ctxt->name##Nr > 0) \
147 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148 else \
149 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000150 ret = ctxt->name##Tab[ctxt->name##Nr]; \
151 ctxt->name##Tab[ctxt->name##Nr] = 0; \
152 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000153} \
154
Daniel Veillardb1059e22000-09-16 14:02:43 +0000155/*
156 * Those macros actually generate the functions
157 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000158PUSH_AND_POP(extern, xmlParserInputPtr, input)
159PUSH_AND_POP(extern, xmlNodePtr, node)
160PUSH_AND_POP(extern, xmlChar*, name)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000161
Daniel Veillardcf461992000-03-14 18:30:20 +0000162int spacePush(xmlParserCtxtPtr ctxt, int val) {
163 if (ctxt->spaceNr >= ctxt->spaceMax) {
164 ctxt->spaceMax *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000165 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
Daniel Veillardcf461992000-03-14 18:30:20 +0000166 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
167 if (ctxt->spaceTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000168 xmlGenericError(xmlGenericErrorContext,
169 "realloc failed !\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000170 return(0);
171 }
172 }
173 ctxt->spaceTab[ctxt->spaceNr] = val;
174 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
175 return(ctxt->spaceNr++);
176}
177
178int spacePop(xmlParserCtxtPtr ctxt) {
179 int ret;
180 if (ctxt->spaceNr <= 0) return(0);
181 ctxt->spaceNr--;
182 if (ctxt->spaceNr > 0)
183 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
184 else
185 ctxt->space = NULL;
186 ret = ctxt->spaceTab[ctxt->spaceNr];
187 ctxt->spaceTab[ctxt->spaceNr] = -1;
188 return(ret);
189}
190
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000191/*
192 * Macros for accessing the content. Those should be used only by the parser,
193 * and not exported.
194 *
Daniel Veillardb1059e22000-09-16 14:02:43 +0000195 * Dirty macros, i.e. one often need to make assumption on the context to
196 * use them
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000197 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000198 * CUR_PTR return the current pointer to the xmlChar to be parsed.
Daniel Veillardcf461992000-03-14 18:30:20 +0000199 * To be used with extreme caution since operations consuming
200 * characters may move the input buffer to a different location !
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000201 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
Daniel Veillardcf461992000-03-14 18:30:20 +0000202 * This should be used internally by the parser
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000203 * only to compare to ASCII values otherwise it would break when
204 * running with UTF-8 encoding.
Daniel Veillardb1059e22000-09-16 14:02:43 +0000205 * RAW same as CUR but in the input buffer, bypass any token
206 * extraction that may have been done
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000207 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000208 * to compare on ASCII based substring.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000209 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000210 * strings within the parser.
211 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000212 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000213 *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000214 * NEXT Skip to the next character, this does the proper decoding
215 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardb1059e22000-09-16 14:02:43 +0000216 * NEXTL(l) Skip l xmlChars in the input buffer
217 * CUR_CHAR(l) returns the current unicode character (int), set l
218 * to the number of xmlChars used for the encoding [0-5].
219 * CUR_SCHAR same but operate on a string instead of the context
220 * COPY_BUF copy the current unicode char to the target buffer, increment
221 * the index
222 * GROW, SHRINK handling of input buffers
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000223 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000224
Daniel Veillardcf461992000-03-14 18:30:20 +0000225#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000226#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000227#define NXT(val) ctxt->input->cur[(val)]
228#define CUR_PTR ctxt->input->cur
Daniel Veillardcf461992000-03-14 18:30:20 +0000229
Daniel Veillard126f2792000-10-24 17:10:12 +0000230#define SKIP(val) do { \
231 ctxt->nbChars += (val),ctxt->input->cur += (val); \
Daniel Veillardcf461992000-03-14 18:30:20 +0000232 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillardb1059e22000-09-16 14:02:43 +0000233 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
Daniel Veillardcf461992000-03-14 18:30:20 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
Daniel Veillard126f2792000-10-24 17:10:12 +0000236 xmlPopInput(ctxt); \
237 } while (0)
Daniel Veillardcf461992000-03-14 18:30:20 +0000238
Daniel Veillard126f2792000-10-24 17:10:12 +0000239#define SHRINK do { \
240 xmlParserInputShrink(ctxt->input); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
Daniel Veillard126f2792000-10-24 17:10:12 +0000243 xmlPopInput(ctxt); \
244 } while (0)
Daniel Veillardb05deb71999-08-10 19:04:08 +0000245
Daniel Veillard126f2792000-10-24 17:10:12 +0000246#define GROW do { \
247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
Daniel Veillard126f2792000-10-24 17:10:12 +0000250 xmlPopInput(ctxt); \
251 } while (0)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000252
Daniel Veillard126f2792000-10-24 17:10:12 +0000253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000254
Daniel Veillard126f2792000-10-24 17:10:12 +0000255#define NEXT xmlNextChar(ctxt)
Daniel Veillardcf461992000-03-14 18:30:20 +0000256
Daniel Veillard126f2792000-10-24 17:10:12 +0000257#define NEXTL(l) do { \
Daniel Veillardcf461992000-03-14 18:30:20 +0000258 if (*(ctxt->input->cur) == '\n') { \
259 ctxt->input->line++; ctxt->input->col = 1; \
260 } else ctxt->input->col++; \
261 ctxt->token = 0; ctxt->input->cur += l; \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000262 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard126f2792000-10-24 17:10:12 +0000263 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
264 } while (0)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000265
Daniel Veillard126f2792000-10-24 17:10:12 +0000266#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
267#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
Daniel Veillardcf461992000-03-14 18:30:20 +0000268
269#define COPY_BUF(l,b,i,v) \
270 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard126f2792000-10-24 17:10:12 +0000271 else i += xmlCopyChar(l,&b[i],v)
Daniel Veillardcf461992000-03-14 18:30:20 +0000272
273/**
Daniel Veillardcf461992000-03-14 18:30:20 +0000274 * xmlSkipBlankChars:
275 * @ctxt: the XML parser context
276 *
277 * skip all blanks character found at that point in the input streams.
278 * It pops up finished entities in the process if allowable at that point.
279 *
280 * Returns the number of space chars skipped
281 */
282
283int
284xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
285 int cur, res = 0;
286
Daniel Veillarde0854c32000-08-27 21:12:29 +0000287 /*
288 * It's Okay to use CUR/NEXT here since all the blanks are on
289 * the ASCII range.
290 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000291 do {
292 cur = CUR;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000293 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +0000294 NEXT;
295 cur = CUR;
296 res++;
297 }
298 while ((cur == 0) && (ctxt->inputNr > 1) &&
299 (ctxt->instate != XML_PARSER_COMMENT)) {
300 xmlPopInput(ctxt);
301 cur = CUR;
302 }
Daniel Veillarde0854c32000-08-27 21:12:29 +0000303 /*
304 * Need to handle support of entities branching here
305 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000306 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillardb1059e22000-09-16 14:02:43 +0000307 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
Daniel Veillarde0854c32000-08-27 21:12:29 +0000308 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +0000309 return(res);
310}
Daniel Veillard260a68f1998-08-13 03:39:55 +0000311
Daniel Veillardb05deb71999-08-10 19:04:08 +0000312/************************************************************************
313 * *
Daniel Veillardb1059e22000-09-16 14:02:43 +0000314 * Commodity functions to handle entities *
Daniel Veillardb05deb71999-08-10 19:04:08 +0000315 * *
316 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +0000317
Daniel Veillard11e00581998-10-24 18:27:49 +0000318/**
319 * xmlPopInput:
320 * @ctxt: an XML parser context
321 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000322 * xmlPopInput: the current input pointed by ctxt->input came to an end
323 * pop it and return the next char.
324 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000325 * Returns the current xmlChar in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000326 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000327xmlChar
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000328xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000329 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000330 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000331 xmlGenericError(xmlGenericErrorContext,
332 "Popping input %d\n", ctxt->inputNr);
Daniel Veillardbc50b591999-03-01 12:28:53 +0000333 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000334 if ((*ctxt->input->cur == 0) &&
335 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
336 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000337 return(CUR);
338}
339
Daniel Veillard11e00581998-10-24 18:27:49 +0000340/**
341 * xmlPushInput:
342 * @ctxt: an XML parser context
343 * @input: an XML parser input fragment (entity, XML fragment ...).
344 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000345 * xmlPushInput: switch to a new input stream which is stacked on top
346 * of the previous one(s).
347 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000348void
349xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000350 if (input == NULL) return;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000351
352 if (xmlParserDebugEntities) {
353 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000354 xmlGenericError(xmlGenericErrorContext,
355 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000356 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000357 xmlGenericError(xmlGenericErrorContext,
358 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000359 }
Daniel Veillard260a68f1998-08-13 03:39:55 +0000360 inputPush(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +0000361 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000362}
363
Daniel Veillard11e00581998-10-24 18:27:49 +0000364/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000365 * xmlParseCharRef:
366 * @ctxt: an XML parser context
367 *
368 * parse Reference declarations
369 *
370 * [66] CharRef ::= '&#' [0-9]+ ';' |
371 * '&#x' [0-9a-fA-F]+ ';'
372 *
373 * [ WFC: Legal Character ]
374 * Characters referred to using character references must match the
375 * production for Char.
376 *
Daniel Veillard10a2c651999-12-12 13:03:50 +0000377 * Returns the value parsed (as an int), 0 in case of error
Daniel Veillardb05deb71999-08-10 19:04:08 +0000378 */
379int
380xmlParseCharRef(xmlParserCtxtPtr ctxt) {
381 int val = 0;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000382 int count = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000383
384 if (ctxt->token != 0) {
385 val = ctxt->token;
386 ctxt->token = 0;
387 return(val);
388 }
Daniel Veillarde0854c32000-08-27 21:12:29 +0000389 /*
390 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
391 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000392 if ((RAW == '&') && (NXT(1) == '#') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +0000393 (NXT(2) == 'x')) {
394 SKIP(3);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000395 GROW;
396 while (RAW != ';') { /* loop blocked by count */
397 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000398 val = val * 16 + (CUR - '0');
Daniel Veillarde0854c32000-08-27 21:12:29 +0000399 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000400 val = val * 16 + (CUR - 'a') + 10;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000401 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000402 val = val * 16 + (CUR - 'A') + 10;
403 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000404 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406 ctxt->sax->error(ctxt->userData,
407 "xmlParseCharRef: invalid hexadecimal value\n");
408 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000409 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000410 val = 0;
411 break;
412 }
413 NEXT;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000414 count++;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000415 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000416 if (RAW == ';') {
417 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
418 ctxt->nbChars ++;
419 ctxt->input->cur++;
420 }
421 } else if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000422 SKIP(2);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000423 GROW;
424 while (RAW != ';') { /* loop blocked by count */
425 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000426 val = val * 10 + (CUR - '0');
427 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000428 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
430 ctxt->sax->error(ctxt->userData,
431 "xmlParseCharRef: invalid decimal value\n");
432 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000433 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000434 val = 0;
435 break;
436 }
437 NEXT;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000438 count++;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000439 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000440 if (RAW == ';') {
441 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
442 ctxt->nbChars ++;
443 ctxt->input->cur++;
444 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000445 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000446 ctxt->errNo = XML_ERR_INVALID_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
448 ctxt->sax->error(ctxt->userData,
449 "xmlParseCharRef: invalid value\n");
450 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000451 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000452 }
453
454 /*
455 * [ WFC: Legal Character ]
456 * Characters referred to using character references must match the
457 * production for Char.
458 */
459 if (IS_CHAR(val)) {
460 return(val);
461 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000462 ctxt->errNo = XML_ERR_INVALID_CHAR;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000464 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
Daniel Veillardb05deb71999-08-10 19:04:08 +0000465 val);
466 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000467 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000468 }
469 return(0);
470}
471
472/**
Daniel Veillard10a2c651999-12-12 13:03:50 +0000473 * xmlParseStringCharRef:
474 * @ctxt: an XML parser context
475 * @str: a pointer to an index in the string
476 *
477 * parse Reference declarations, variant parsing from a string rather
478 * than an an input flow.
479 *
480 * [66] CharRef ::= '&#' [0-9]+ ';' |
481 * '&#x' [0-9a-fA-F]+ ';'
482 *
483 * [ WFC: Legal Character ]
484 * Characters referred to using character references must match the
485 * production for Char.
486 *
487 * Returns the value parsed (as an int), 0 in case of error, str will be
488 * updated to the current value of the index
489 */
490int
491xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
492 const xmlChar *ptr;
493 xmlChar cur;
494 int val = 0;
495
496 if ((str == NULL) || (*str == NULL)) return(0);
497 ptr = *str;
498 cur = *ptr;
Daniel Veillard0caf07a1999-12-21 16:25:49 +0000499 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000500 ptr += 3;
501 cur = *ptr;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000502 while (cur != ';') { /* Non input consuming loop */
Daniel Veillard10a2c651999-12-12 13:03:50 +0000503 if ((cur >= '0') && (cur <= '9'))
504 val = val * 16 + (cur - '0');
505 else if ((cur >= 'a') && (cur <= 'f'))
506 val = val * 16 + (cur - 'a') + 10;
507 else if ((cur >= 'A') && (cur <= 'F'))
508 val = val * 16 + (cur - 'A') + 10;
509 else {
510 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
512 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +0000513 "xmlParseStringCharRef: invalid hexadecimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +0000514 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000515 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000516 val = 0;
517 break;
518 }
519 ptr++;
520 cur = *ptr;
521 }
522 if (cur == ';')
523 ptr++;
Daniel Veillard0142b842000-01-14 14:45:24 +0000524 } else if ((cur == '&') && (ptr[1] == '#')){
Daniel Veillard10a2c651999-12-12 13:03:50 +0000525 ptr += 2;
526 cur = *ptr;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000527 while (cur != ';') { /* Non input consuming loops */
Daniel Veillard10a2c651999-12-12 13:03:50 +0000528 if ((cur >= '0') && (cur <= '9'))
529 val = val * 10 + (cur - '0');
530 else {
531 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +0000534 "xmlParseStringCharRef: invalid decimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +0000535 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000536 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000537 val = 0;
538 break;
539 }
540 ptr++;
541 cur = *ptr;
542 }
543 if (cur == ';')
544 ptr++;
545 } else {
546 ctxt->errNo = XML_ERR_INVALID_CHARREF;
547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
548 ctxt->sax->error(ctxt->userData,
549 "xmlParseCharRef: invalid value\n");
550 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000551 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000552 return(0);
553 }
554 *str = ptr;
555
556 /*
557 * [ WFC: Legal Character ]
558 * Characters referred to using character references must match the
559 * production for Char.
560 */
561 if (IS_CHAR(val)) {
562 return(val);
563 } else {
564 ctxt->errNo = XML_ERR_INVALID_CHAR;
565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
566 ctxt->sax->error(ctxt->userData,
567 "CharRef: invalid xmlChar value %d\n", val);
568 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000569 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000570 }
571 return(0);
572}
573
574/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000575 * xmlParserHandlePEReference:
576 * @ctxt: the parser context
577 *
578 * [69] PEReference ::= '%' Name ';'
579 *
580 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +0000581 * A parsed entity must not contain a recursive
Daniel Veillardb05deb71999-08-10 19:04:08 +0000582 * reference to itself, either directly or indirectly.
583 *
584 * [ WFC: Entity Declared ]
585 * In a document without any DTD, a document with only an internal DTD
586 * subset which contains no parameter entity references, or a document
587 * with "standalone='yes'", ... ... The declaration of a parameter
588 * entity must precede any reference to it...
589 *
590 * [ VC: Entity Declared ]
591 * In a document with an external subset or external parameter entities
592 * with "standalone='no'", ... ... The declaration of a parameter entity
593 * must precede any reference to it...
594 *
595 * [ WFC: In DTD ]
596 * Parameter-entity references may only appear in the DTD.
597 * NOTE: misleading but this is handled.
598 *
599 * A PEReference may have been detected in the current input stream
600 * the handling is done accordingly to
601 * http://www.w3.org/TR/REC-xml#entproc
602 * i.e.
603 * - Included in literal in entity values
604 * - Included as Paraemeter Entity reference within DTDs
605 */
606void
607xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000608 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000609 xmlEntityPtr entity = NULL;
610 xmlParserInputPtr input;
611
Daniel Veillard35008381999-10-25 13:15:52 +0000612 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +0000613 return;
614 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000615 if (RAW != '%') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000616 switch(ctxt->instate) {
617 case XML_PARSER_CDATA_SECTION:
618 return;
619 case XML_PARSER_COMMENT:
620 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000621 case XML_PARSER_START_TAG:
622 return;
623 case XML_PARSER_END_TAG:
624 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000625 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000626 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
628 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
629 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000630 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000631 return;
632 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000633 case XML_PARSER_START:
634 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000635 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
637 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
638 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000639 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000640 return;
641 case XML_PARSER_ENTITY_DECL:
642 case XML_PARSER_CONTENT:
643 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000644 case XML_PARSER_PI:
Daniel Veillardcf461992000-03-14 18:30:20 +0000645 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardb05deb71999-08-10 19:04:08 +0000646 /* we just ignore it there */
647 return;
648 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000649 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
651 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
652 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000653 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000654 return;
655 case XML_PARSER_ENTITY_VALUE:
656 /*
657 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +0000658 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +0000659 * entity value to be able to save the internal
660 * subset of the document.
Daniel Veillarde0854c32000-08-27 21:12:29 +0000661 * This will be handled by xmlStringDecodeEntities
Daniel Veillardb05deb71999-08-10 19:04:08 +0000662 */
663 return;
664 case XML_PARSER_DTD:
665 /*
666 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
667 * In the internal DTD subset, parameter-entity references
668 * can occur only where markup declarations can occur, not
669 * within markup declarations.
670 * In that case this is handled in xmlParseMarkupDecl
671 */
672 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
673 return;
Daniel Veillard41e06512000-11-13 11:47:47 +0000674 break;
675 case XML_PARSER_IGNORE:
676 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000677 }
678
679 NEXT;
680 name = xmlParseName(ctxt);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000681 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000682 xmlGenericError(xmlGenericErrorContext,
683 "PE Reference: %s\n", name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000684 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000685 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000686 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
688 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000689 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000690 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +0000691 if (RAW == ';') {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000692 NEXT;
693 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
694 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
695 if (entity == NULL) {
696
697 /*
698 * [ WFC: Entity Declared ]
699 * In a document without any DTD, a document with only an
700 * internal DTD subset which contains no parameter entity
701 * references, or a document with "standalone='yes'", ...
702 * ... The declaration of a parameter entity must precede
703 * any reference to it...
704 */
705 if ((ctxt->standalone == 1) ||
706 ((ctxt->hasExternalSubset == 0) &&
707 (ctxt->hasPErefs == 0))) {
708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
709 ctxt->sax->error(ctxt->userData,
710 "PEReference: %%%s; not found\n", name);
711 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000712 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000713 } else {
714 /*
715 * [ VC: Entity Declared ]
716 * In a document with an external subset or external
717 * parameter entities with "standalone='no'", ...
718 * ... The declaration of a parameter entity must precede
719 * any reference to it...
720 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000721 if ((!ctxt->disableSAX) &&
722 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
Daniel Veillard87b95392000-08-12 21:12:04 +0000723 ctxt->vctxt.error(ctxt->vctxt.userData,
724 "PEReference: %%%s; not found\n", name);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000725 } else if ((!ctxt->disableSAX) &&
726 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000727 ctxt->sax->warning(ctxt->userData,
728 "PEReference: %%%s; not found\n", name);
729 ctxt->valid = 0;
730 }
731 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +0000732 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
733 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000734 /*
Daniel Veillardb1059e22000-09-16 14:02:43 +0000735 * handle the extra spaces added before and after
Daniel Veillardb05deb71999-08-10 19:04:08 +0000736 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardb1059e22000-09-16 14:02:43 +0000737 * this is done independantly.
Daniel Veillardb05deb71999-08-10 19:04:08 +0000738 */
739 input = xmlNewEntityInputStream(ctxt, entity);
740 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +0000741 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
742 (RAW == '<') && (NXT(1) == '?') &&
743 (NXT(2) == 'x') && (NXT(3) == 'm') &&
744 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
745 xmlParseTextDecl(ctxt);
746 }
747 if (ctxt->token == 0)
748 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +0000749 } else {
750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
751 ctxt->sax->error(ctxt->userData,
752 "xmlHandlePEReference: %s is not a parameter entity\n",
753 name);
754 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000755 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000756 }
757 }
758 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000759 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
761 ctxt->sax->error(ctxt->userData,
762 "xmlHandlePEReference: expecting ';'\n");
763 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000764 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000765 }
Daniel Veillard6454aec1999-09-02 22:04:43 +0000766 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000767 }
768}
769
Daniel Veillard011b63c1999-06-02 17:44:04 +0000770/*
771 * Macro used to grow the current buffer.
772 */
773#define growBuffer(buffer) { \
774 buffer##_size *= 2; \
Daniel Veillard0142b842000-01-14 14:45:24 +0000775 buffer = (xmlChar *) \
776 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +0000777 if (buffer == NULL) { \
778 perror("realloc failed"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000779 return(NULL); \
Daniel Veillard011b63c1999-06-02 17:44:04 +0000780 } \
781}
782
Daniel Veillard011b63c1999-06-02 17:44:04 +0000783/**
Daniel Veillard10a2c651999-12-12 13:03:50 +0000784 * xmlStringDecodeEntities:
785 * @ctxt: the parser context
786 * @str: the input string
787 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
788 * @end: an end marker xmlChar, 0 if none
789 * @end2: an end marker xmlChar, 0 if none
790 * @end3: an end marker xmlChar, 0 if none
791 *
Daniel Veillarde0854c32000-08-27 21:12:29 +0000792 * Takes a entity string content and process to do the adequate subtitutions.
793 *
Daniel Veillard10a2c651999-12-12 13:03:50 +0000794 * [67] Reference ::= EntityRef | CharRef
795 *
796 * [69] PEReference ::= '%' Name ';'
797 *
798 * Returns A newly allocated string with the substitution done. The caller
799 * must deallocate it !
800 */
801xmlChar *
802xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
803 xmlChar end, xmlChar end2, xmlChar end3) {
804 xmlChar *buffer = NULL;
805 int buffer_size = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000806
807 xmlChar *current = NULL;
808 xmlEntityPtr ent;
Daniel Veillardcf461992000-03-14 18:30:20 +0000809 int c,l;
810 int nbchars = 0;
811
Daniel Veillard87b95392000-08-12 21:12:04 +0000812 if (str == NULL)
813 return(NULL);
814
Daniel Veillardcf461992000-03-14 18:30:20 +0000815 if (ctxt->depth > 40) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +0000816 ctxt->errNo = XML_ERR_ENTITY_LOOP;
Daniel Veillardcf461992000-03-14 18:30:20 +0000817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
818 ctxt->sax->error(ctxt->userData,
819 "Detected entity reference loop\n");
820 ctxt->wellFormed = 0;
821 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +0000822 return(NULL);
823 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000824
825 /*
826 * allocate a translation buffer.
827 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000828 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000829 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
830 if (buffer == NULL) {
831 perror("xmlDecodeEntities: malloc failed");
832 return(NULL);
833 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000834
835 /*
836 * Ok loop until we reach one of the ending char or a size limit.
Daniel Veillarde0854c32000-08-27 21:12:29 +0000837 * we are operating on already parsed values.
Daniel Veillard10a2c651999-12-12 13:03:50 +0000838 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000839 c = CUR_SCHAR(str, l);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000840 while ((c != 0) && (c != end) && /* non input consuming loop */
841 (c != end2) && (c != end3)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000842
Daniel Veillardcf461992000-03-14 18:30:20 +0000843 if (c == 0) break;
844 if ((c == '&') && (str[1] == '#')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000845 int val = xmlParseStringCharRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +0000846 if (val != 0) {
847 COPY_BUF(0,buffer,nbchars,val);
848 }
849 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000850 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000851 xmlGenericError(xmlGenericErrorContext,
852 "String decoding Entity Reference: %.30s\n",
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000853 str);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000854 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000855 if ((ent != NULL) &&
856 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard52402ce2000-08-22 23:36:12 +0000857 if (ent->content != NULL) {
858 COPY_BUF(0,buffer,nbchars,ent->content[0]);
859 } else {
860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861 ctxt->sax->error(ctxt->userData,
862 "internal error entity has no content\n");
863 }
864 } else if ((ent != NULL) && (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000865 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000866
Daniel Veillardcf461992000-03-14 18:30:20 +0000867 ctxt->depth++;
868 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
869 0, 0, 0);
870 ctxt->depth--;
871 if (rep != NULL) {
872 current = rep;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000873 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +0000874 buffer[nbchars++] = *current++;
875 if (nbchars >
876 buffer_size - XML_PARSER_BUFFER_SIZE) {
877 growBuffer(buffer);
878 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000879 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000880 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000881 }
882 } else if (ent != NULL) {
883 int i = xmlStrlen(ent->name);
884 const xmlChar *cur = ent->name;
885
Daniel Veillardcf461992000-03-14 18:30:20 +0000886 buffer[nbchars++] = '&';
887 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000888 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000889 }
890 for (;i > 0;i--)
Daniel Veillardcf461992000-03-14 18:30:20 +0000891 buffer[nbchars++] = *cur++;
892 buffer[nbchars++] = ';';
Daniel Veillard10a2c651999-12-12 13:03:50 +0000893 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000894 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000895 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000896 xmlGenericError(xmlGenericErrorContext,
897 "String decoding PE Reference: %.30s\n", str);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000898 ent = xmlParseStringPEReference(ctxt, &str);
899 if (ent != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000900 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000901
Daniel Veillardcf461992000-03-14 18:30:20 +0000902 ctxt->depth++;
903 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
904 0, 0, 0);
905 ctxt->depth--;
906 if (rep != NULL) {
907 current = rep;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000908 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +0000909 buffer[nbchars++] = *current++;
910 if (nbchars >
911 buffer_size - XML_PARSER_BUFFER_SIZE) {
912 growBuffer(buffer);
913 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000914 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000915 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000916 }
917 }
918 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +0000919 COPY_BUF(l,buffer,nbchars,c);
920 str += l;
921 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000922 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000923 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000924 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000925 c = CUR_SCHAR(str, l);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000926 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000927 buffer[nbchars++] = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000928 return(buffer);
929}
930
Daniel Veillard260a68f1998-08-13 03:39:55 +0000931
932/************************************************************************
933 * *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000934 * Commodity functions to handle xmlChars *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000935 * *
936 ************************************************************************/
937
Daniel Veillard11e00581998-10-24 18:27:49 +0000938/**
939 * xmlStrndup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000940 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +0000941 * @len: the len of @cur
942 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000943 * a strndup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000944 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000945 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000946 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000947xmlChar *
948xmlStrndup(const xmlChar *cur, int len) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000949 xmlChar *ret;
950
951 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000952 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000953 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000954 xmlGenericError(xmlGenericErrorContext,
955 "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000956 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000957 return(NULL);
958 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000959 memcpy(ret, cur, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000960 ret[len] = 0;
961 return(ret);
962}
963
Daniel Veillard11e00581998-10-24 18:27:49 +0000964/**
965 * xmlStrdup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000966 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +0000967 *
Daniel Veillardcf461992000-03-14 18:30:20 +0000968 * a strdup for array of xmlChar's. Since they are supposed to be
969 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
970 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000971 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000972 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000973 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000974xmlChar *
975xmlStrdup(const xmlChar *cur) {
976 const xmlChar *p = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000977
Daniel Veillard10a2c651999-12-12 13:03:50 +0000978 if (cur == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000979 while (*p != 0) p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000980 return(xmlStrndup(cur, p - cur));
981}
982
Daniel Veillard11e00581998-10-24 18:27:49 +0000983/**
984 * xmlCharStrndup:
985 * @cur: the input char *
986 * @len: the len of @cur
987 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000988 * a strndup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000989 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000990 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000991 */
992
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000993xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000994xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000995 int i;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000996 xmlChar *ret;
997
998 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000999 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001000 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001001 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001002 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001003 return(NULL);
1004 }
1005 for (i = 0;i < len;i++)
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001006 ret[i] = (xmlChar) cur[i];
Daniel Veillard260a68f1998-08-13 03:39:55 +00001007 ret[len] = 0;
1008 return(ret);
1009}
1010
Daniel Veillard11e00581998-10-24 18:27:49 +00001011/**
1012 * xmlCharStrdup:
1013 * @cur: the input char *
1014 * @len: the len of @cur
1015 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001016 * a strdup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001017 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001018 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001019 */
1020
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001021xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001022xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001023 const char *p = cur;
1024
Daniel Veillard10a2c651999-12-12 13:03:50 +00001025 if (cur == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001026 while (*p != '\0') p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001027 return(xmlCharStrndup(cur, p - cur));
1028}
1029
Daniel Veillard11e00581998-10-24 18:27:49 +00001030/**
1031 * xmlStrcmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001032 * @str1: the first xmlChar *
1033 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001034 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001035 * a strcmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001036 *
1037 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001038 */
1039
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001040int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001041xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001042 register int tmp;
1043
Daniel Veillardb1059e22000-09-16 14:02:43 +00001044 if (str1 == str2) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001045 if (str1 == NULL) return(-1);
1046 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001047 do {
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001048 tmp = *str1++ - *str2;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001049 if (tmp != 0) return(tmp);
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001050 } while (*str2++ != 0);
1051 return 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001052}
1053
Daniel Veillard11e00581998-10-24 18:27:49 +00001054/**
Daniel Veillard8b5dd832000-10-01 20:28:44 +00001055 * xmlStrEqual:
1056 * @str1: the first xmlChar *
1057 * @str2: the second xmlChar *
1058 *
1059 * Check if both string are equal of have same content
1060 * Should be a bit more readable and faster than xmlStrEqual()
1061 *
1062 * Returns 1 if they are equal, 0 if they are different
1063 */
1064
1065int
1066xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1067 if (str1 == str2) return(1);
1068 if (str1 == NULL) return(0);
1069 if (str2 == NULL) return(0);
1070 do {
1071 if (*str1++ != *str2) return(0);
1072 } while (*str2++);
1073 return(1);
1074}
1075
1076/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001077 * xmlStrncmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001078 * @str1: the first xmlChar *
1079 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001080 * @len: the max comparison length
1081 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001082 * a strncmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001083 *
1084 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001085 */
1086
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001087int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001088xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001089 register int tmp;
1090
1091 if (len <= 0) return(0);
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001092 if (str1 == str2) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001093 if (str1 == NULL) return(-1);
1094 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001095 do {
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001096 tmp = *str1++ - *str2;
1097 if (tmp != 0 || --len == 0) return(tmp);
1098 } while (*str2++ != 0);
1099 return 0;
1100}
1101
1102static xmlChar casemap[256] = {
1103 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1104 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1105 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1106 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1107 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1108 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1109 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1110 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1111 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1112 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1113 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1114 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1115 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1116 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1117 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1118 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1119 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1120 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1121 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1122 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1123 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1124 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1125 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1126 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1127 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1128 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1129 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1130 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1131 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1132 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1133 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1134 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1135};
1136
1137/**
1138 * xmlStrcasecmp:
1139 * @str1: the first xmlChar *
1140 * @str2: the second xmlChar *
1141 *
1142 * a strcasecmp for xmlChar's
1143 *
1144 * Returns the integer result of the comparison
1145 */
1146
1147int
1148xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1149 register int tmp;
1150
1151 if (str1 == str2) return(0);
1152 if (str1 == NULL) return(-1);
1153 if (str2 == NULL) return(1);
1154 do {
1155 tmp = casemap[*str1++] - casemap[*str2];
Daniel Veillard260a68f1998-08-13 03:39:55 +00001156 if (tmp != 0) return(tmp);
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001157 } while (*str2++ != 0);
1158 return 0;
1159}
1160
1161/**
1162 * xmlStrncasecmp:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 * @len: the max comparison length
1166 *
1167 * a strncasecmp for xmlChar's
1168 *
1169 * Returns the integer result of the comparison
1170 */
1171
1172int
1173xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174 register int tmp;
1175
1176 if (len <= 0) return(0);
1177 if (str1 == str2) return(0);
1178 if (str1 == NULL) return(-1);
1179 if (str2 == NULL) return(1);
1180 do {
1181 tmp = casemap[*str1++] - casemap[*str2];
1182 if (tmp != 0 || --len == 0) return(tmp);
1183 } while (*str2++ != 0);
1184 return 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001185}
1186
Daniel Veillard11e00581998-10-24 18:27:49 +00001187/**
1188 * xmlStrchr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001189 * @str: the xmlChar * array
1190 * @val: the xmlChar to search
Daniel Veillard11e00581998-10-24 18:27:49 +00001191 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001192 * a strchr for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001193 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001194 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001195 */
1196
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001197const xmlChar *
1198xmlStrchr(const xmlChar *str, xmlChar val) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001199 if (str == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001200 while (*str != 0) { /* non input consuming */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001201 if (*str == val) return((xmlChar *) str);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001202 str++;
1203 }
1204 return(NULL);
1205}
1206
Daniel Veillard11e00581998-10-24 18:27:49 +00001207/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001208 * xmlStrstr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001209 * @str: the xmlChar * array (haystack)
1210 * @val: the xmlChar to search (needle)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001211 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001212 * a strstr for xmlChar's
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001213 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001214 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001215 */
1216
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001217const xmlChar *
1218xmlStrstr(const xmlChar *str, xmlChar *val) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001219 int n;
1220
1221 if (str == NULL) return(NULL);
1222 if (val == NULL) return(NULL);
1223 n = xmlStrlen(val);
1224
1225 if (n == 0) return(str);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001226 while (*str != 0) { /* non input consuming */
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001227 if (*str == *val) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001228 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001229 }
1230 str++;
1231 }
1232 return(NULL);
1233}
1234
1235/**
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001236 * xmlStrcasestr:
1237 * @str: the xmlChar * array (haystack)
1238 * @val: the xmlChar to search (needle)
1239 *
1240 * a case-ignoring strstr for xmlChar's
1241 *
1242 * Returns the xmlChar * for the first occurence or NULL.
1243 */
1244
1245const xmlChar *
1246xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1247 int n;
1248
1249 if (str == NULL) return(NULL);
1250 if (val == NULL) return(NULL);
1251 n = xmlStrlen(val);
1252
1253 if (n == 0) return(str);
1254 while (*str != 0) { /* non input consuming */
1255 if (casemap[*str] == casemap[*val])
1256 if (!xmlStrncasecmp(str, val, n)) return(str);
1257 str++;
1258 }
1259 return(NULL);
1260}
1261
1262/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001263 * xmlStrsub:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001264 * @str: the xmlChar * array (haystack)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001265 * @start: the index of the first char (zero based)
1266 * @len: the length of the substring
1267 *
1268 * Extract a substring of a given string
1269 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001270 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001271 */
1272
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001273xmlChar *
1274xmlStrsub(const xmlChar *str, int start, int len) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001275 int i;
1276
1277 if (str == NULL) return(NULL);
1278 if (start < 0) return(NULL);
1279 if (len < 0) return(NULL);
1280
1281 for (i = 0;i < start;i++) {
1282 if (*str == 0) return(NULL);
1283 str++;
1284 }
1285 if (*str == 0) return(NULL);
1286 return(xmlStrndup(str, len));
1287}
1288
1289/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001290 * xmlStrlen:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001291 * @str: the xmlChar * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001292 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00001293 * length of a xmlChar's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00001294 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001295 * Returns the number of xmlChar contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001296 */
1297
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001298int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001299xmlStrlen(const xmlChar *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001300 int len = 0;
1301
1302 if (str == NULL) return(0);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001303 while (*str != 0) { /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001304 str++;
1305 len++;
1306 }
1307 return(len);
1308}
1309
Daniel Veillard11e00581998-10-24 18:27:49 +00001310/**
1311 * xmlStrncat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001312 * @cur: the original xmlChar * array
1313 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00001314 * @len: the length of @add
1315 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001316 * a strncat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001317 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001318 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001319 */
1320
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001321xmlChar *
1322xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001323 int size;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001324 xmlChar *ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001325
1326 if ((add == NULL) || (len == 0))
1327 return(cur);
1328 if (cur == NULL)
1329 return(xmlStrndup(add, len));
1330
1331 size = xmlStrlen(cur);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001332 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001333 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001334 xmlGenericError(xmlGenericErrorContext,
1335 "xmlStrncat: realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001336 (size + len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001337 return(cur);
1338 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001339 memcpy(&ret[size], add, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001340 ret[size + len] = 0;
1341 return(ret);
1342}
1343
Daniel Veillard11e00581998-10-24 18:27:49 +00001344/**
1345 * xmlStrcat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001346 * @cur: the original xmlChar * array
1347 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00001348 *
Daniel Veillardcf461992000-03-14 18:30:20 +00001349 * a strcat for array of xmlChar's. Since they are supposed to be
1350 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1351 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00001352 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001353 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001354 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001355xmlChar *
1356xmlStrcat(xmlChar *cur, const xmlChar *add) {
1357 const xmlChar *p = add;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001358
1359 if (add == NULL) return(cur);
1360 if (cur == NULL)
1361 return(xmlStrdup(add));
1362
Daniel Veillarde0854c32000-08-27 21:12:29 +00001363 while (*p != 0) p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001364 return(xmlStrncat(cur, add, p - add));
1365}
1366
1367/************************************************************************
1368 * *
1369 * Commodity functions, cleanup needed ? *
1370 * *
1371 ************************************************************************/
1372
Daniel Veillard11e00581998-10-24 18:27:49 +00001373/**
1374 * areBlanks:
1375 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001376 * @str: a xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001377 * @len: the size of @str
1378 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001379 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00001380 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001381 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001382 */
1383
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001384static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001385 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001386 xmlNodePtr lastChild;
1387
Daniel Veillard83a30e72000-03-02 03:33:32 +00001388 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001389 * Check for xml:space value.
1390 */
1391 if (*(ctxt->space) == 1)
1392 return(0);
1393
1394 /*
Daniel Veillard83a30e72000-03-02 03:33:32 +00001395 * Check that the string is made of blanks
1396 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001397 for (i = 0;i < len;i++)
1398 if (!(IS_BLANK(str[i]))) return(0);
1399
Daniel Veillard83a30e72000-03-02 03:33:32 +00001400 /*
1401 * Look if the element is mixed content in the Dtd if available
1402 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001403 if (ctxt->myDoc != NULL) {
1404 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1405 if (ret == 0) return(1);
1406 if (ret == 1) return(0);
1407 }
Daniel Veillard83a30e72000-03-02 03:33:32 +00001408
Daniel Veillardb05deb71999-08-10 19:04:08 +00001409 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001410 * Otherwise, heuristic :-\
Daniel Veillardb05deb71999-08-10 19:04:08 +00001411 */
Daniel Veillard83a30e72000-03-02 03:33:32 +00001412 if (ctxt->keepBlanks)
1413 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00001414 if (RAW != '<') return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00001415 if (ctxt->node == NULL) return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00001416 if ((ctxt->node->children == NULL) &&
1417 (RAW == '<') && (NXT(1) == '/')) return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00001418
Daniel Veillard260a68f1998-08-13 03:39:55 +00001419 lastChild = xmlGetLastChild(ctxt->node);
1420 if (lastChild == NULL) {
Daniel Veillard90fb02c2000-03-02 03:46:43 +00001421 if (ctxt->node->content != NULL) return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001422 } else if (xmlNodeIsText(lastChild))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00001423 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00001424 else if ((ctxt->node->children != NULL) &&
1425 (xmlNodeIsText(ctxt->node->children)))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00001426 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001427 return(1);
1428}
1429
Daniel Veillard260a68f1998-08-13 03:39:55 +00001430/*
1431 * Forward definition for recusive behaviour.
1432 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00001433void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1434void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001435
1436/************************************************************************
1437 * *
1438 * Extra stuff for namespace support *
1439 * Relates to http://www.w3.org/TR/WD-xml-names *
1440 * *
1441 ************************************************************************/
1442
Daniel Veillard11e00581998-10-24 18:27:49 +00001443/**
Daniel Veillarde0854c32000-08-27 21:12:29 +00001444 * xmlSplitQName:
1445 * @ctxt: an XML parser context
1446 * @name: an XML parser context
1447 * @prefix: a xmlChar **
1448 *
1449 * parse an UTF8 encoded XML qualified name string
1450 *
1451 * [NS 5] QName ::= (Prefix ':')? LocalPart
1452 *
1453 * [NS 6] Prefix ::= NCName
1454 *
1455 * [NS 7] LocalPart ::= NCName
1456 *
1457 * Returns the local part, and prefix is updated
1458 * to get the Prefix if any.
1459 */
1460
1461xmlChar *
1462xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1463 xmlChar buf[XML_MAX_NAMELEN + 5];
1464 xmlChar *buffer = NULL;
1465 int len = 0;
1466 int max = XML_MAX_NAMELEN;
1467 xmlChar *ret = NULL;
1468 const xmlChar *cur = name;
1469 int c;
1470
1471 *prefix = NULL;
1472
1473 /* xml: prefix is not really a namespace */
1474 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1475 (cur[2] == 'l') && (cur[3] == ':'))
1476 return(xmlStrdup(name));
1477
1478 /* nasty but valid */
1479 if (cur[0] == ':')
1480 return(xmlStrdup(name));
1481
1482 c = *cur++;
1483 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1484 buf[len++] = c;
1485 c = *cur++;
1486 }
1487 if (len >= max) {
1488 /*
1489 * Okay someone managed to make a huge name, so he's ready to pay
1490 * for the processing speed.
1491 */
1492 max = len * 2;
1493
1494 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1495 if (buffer == NULL) {
1496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1497 ctxt->sax->error(ctxt->userData,
1498 "xmlSplitQName: out of memory\n");
1499 return(NULL);
1500 }
1501 memcpy(buffer, buf, len);
1502 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1503 if (len + 10 > max) {
1504 max *= 2;
1505 buffer = (xmlChar *) xmlRealloc(buffer,
1506 max * sizeof(xmlChar));
1507 if (buffer == NULL) {
1508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1509 ctxt->sax->error(ctxt->userData,
1510 "xmlSplitQName: out of memory\n");
1511 return(NULL);
1512 }
1513 }
1514 buffer[len++] = c;
1515 c = *cur++;
1516 }
1517 buffer[len] = 0;
1518 }
1519
1520 if (buffer == NULL)
1521 ret = xmlStrndup(buf, len);
1522 else {
1523 ret = buffer;
1524 buffer = NULL;
1525 max = XML_MAX_NAMELEN;
1526 }
1527
1528
1529 if (c == ':') {
1530 c = *cur++;
1531 if (c == 0) return(ret);
1532 *prefix = ret;
1533 len = 0;
1534
1535 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1536 buf[len++] = c;
1537 c = *cur++;
1538 }
1539 if (len >= max) {
1540 /*
1541 * Okay someone managed to make a huge name, so he's ready to pay
1542 * for the processing speed.
1543 */
1544 max = len * 2;
1545
1546 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1547 if (buffer == NULL) {
1548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1549 ctxt->sax->error(ctxt->userData,
1550 "xmlSplitQName: out of memory\n");
1551 return(NULL);
1552 }
1553 memcpy(buffer, buf, len);
1554 while (c != 0) { /* tested bigname2.xml */
1555 if (len + 10 > max) {
1556 max *= 2;
1557 buffer = (xmlChar *) xmlRealloc(buffer,
1558 max * sizeof(xmlChar));
1559 if (buffer == NULL) {
1560 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561 ctxt->sax->error(ctxt->userData,
1562 "xmlSplitQName: out of memory\n");
1563 return(NULL);
1564 }
1565 }
1566 buffer[len++] = c;
1567 c = *cur++;
1568 }
1569 buffer[len] = 0;
1570 }
1571
1572 if (buffer == NULL)
1573 ret = xmlStrndup(buf, len);
1574 else {
1575 ret = buffer;
1576 }
1577 }
1578
1579 return(ret);
1580}
1581
Daniel Veillard260a68f1998-08-13 03:39:55 +00001582/************************************************************************
1583 * *
1584 * The parser itself *
1585 * Relates to http://www.w3.org/TR/REC-xml *
1586 * *
1587 ************************************************************************/
1588
Daniel Veillard11e00581998-10-24 18:27:49 +00001589/**
1590 * xmlParseName:
1591 * @ctxt: an XML parser context
1592 *
1593 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001594 *
1595 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1596 * CombiningChar | Extender
1597 *
1598 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1599 *
1600 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00001601 *
1602 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001603 */
1604
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001605xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001606xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001607 xmlChar buf[XML_MAX_NAMELEN + 5];
1608 int len = 0, l;
1609 int c;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001610 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001611
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001612 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001613 c = CUR_CHAR(l);
Daniel Veillard5e873c42000-04-12 13:27:38 +00001614 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1615 (!IS_LETTER(c) && (c != '_') &&
1616 (c != ':'))) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001617 return(NULL);
1618 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001619
Daniel Veillarde0854c32000-08-27 21:12:29 +00001620 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard5e873c42000-04-12 13:27:38 +00001621 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1622 (c == '.') || (c == '-') ||
1623 (c == '_') || (c == ':') ||
1624 (IS_COMBINING(c)) ||
1625 (IS_EXTENDER(c)))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001626 if (count++ > 100) {
1627 count = 0;
1628 GROW;
1629 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001630 COPY_BUF(l,buf,len,c);
1631 NEXTL(l);
1632 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001633 if (len >= XML_MAX_NAMELEN) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001634 /*
1635 * Okay someone managed to make a huge name, so he's ready to pay
1636 * for the processing speed.
1637 */
1638 xmlChar *buffer;
1639 int max = len * 2;
1640
1641 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1642 if (buffer == NULL) {
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "xmlParseName: out of memory\n");
1646 return(NULL);
1647 }
1648 memcpy(buffer, buf, len);
1649 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001650 (c == '.') || (c == '-') ||
1651 (c == '_') || (c == ':') ||
1652 (IS_COMBINING(c)) ||
1653 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001654 if (count++ > 100) {
1655 count = 0;
1656 GROW;
1657 }
1658 if (len + 10 > max) {
1659 max *= 2;
1660 buffer = (xmlChar *) xmlRealloc(buffer,
1661 max * sizeof(xmlChar));
1662 if (buffer == NULL) {
1663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664 ctxt->sax->error(ctxt->userData,
1665 "xmlParseName: out of memory\n");
1666 return(NULL);
1667 }
1668 }
1669 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00001670 NEXTL(l);
1671 c = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001672 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001673 buffer[len] = 0;
1674 return(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001675 }
1676 }
1677 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001678}
1679
Daniel Veillard11e00581998-10-24 18:27:49 +00001680/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00001681 * xmlParseStringName:
1682 * @ctxt: an XML parser context
Daniel Veillardb1059e22000-09-16 14:02:43 +00001683 * @str: a pointer to the string pointer (IN/OUT)
Daniel Veillard10a2c651999-12-12 13:03:50 +00001684 *
1685 * parse an XML name.
1686 *
1687 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1688 * CombiningChar | Extender
1689 *
1690 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1691 *
1692 * [6] Names ::= Name (S Name)*
1693 *
1694 * Returns the Name parsed or NULL. The str pointer
1695 * is updated to the current location in the string.
1696 */
1697
1698xmlChar *
1699xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001700 xmlChar buf[XML_MAX_NAMELEN + 5];
1701 const xmlChar *cur = *str;
1702 int len = 0, l;
1703 int c;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001704
Daniel Veillardcf461992000-03-14 18:30:20 +00001705 c = CUR_SCHAR(cur, l);
1706 if (!IS_LETTER(c) && (c != '_') &&
1707 (c != ':')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001708 return(NULL);
1709 }
1710
Daniel Veillarde0854c32000-08-27 21:12:29 +00001711 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001712 (c == '.') || (c == '-') ||
1713 (c == '_') || (c == ':') ||
1714 (IS_COMBINING(c)) ||
1715 (IS_EXTENDER(c))) {
1716 COPY_BUF(l,buf,len,c);
1717 cur += l;
1718 c = CUR_SCHAR(cur, l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001719 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1720 /*
1721 * Okay someone managed to make a huge name, so he's ready to pay
1722 * for the processing speed.
1723 */
1724 xmlChar *buffer;
1725 int max = len * 2;
1726
1727 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1728 if (buffer == NULL) {
1729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730 ctxt->sax->error(ctxt->userData,
1731 "xmlParseStringName: out of memory\n");
1732 return(NULL);
1733 }
1734 memcpy(buffer, buf, len);
1735 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001736 (c == '.') || (c == '-') ||
1737 (c == '_') || (c == ':') ||
1738 (IS_COMBINING(c)) ||
1739 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001740 if (len + 10 > max) {
1741 max *= 2;
1742 buffer = (xmlChar *) xmlRealloc(buffer,
1743 max * sizeof(xmlChar));
1744 if (buffer == NULL) {
1745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1746 ctxt->sax->error(ctxt->userData,
1747 "xmlParseStringName: out of memory\n");
1748 return(NULL);
1749 }
1750 }
1751 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00001752 cur += l;
1753 c = CUR_SCHAR(cur, l);
1754 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001755 buffer[len] = 0;
1756 *str = cur;
1757 return(buffer);
Daniel Veillardcf461992000-03-14 18:30:20 +00001758 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00001759 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001760 *str = cur;
1761 return(xmlStrndup(buf, len));
Daniel Veillard10a2c651999-12-12 13:03:50 +00001762}
1763
1764/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001765 * xmlParseNmtoken:
1766 * @ctxt: an XML parser context
1767 *
1768 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001769 *
1770 * [7] Nmtoken ::= (NameChar)+
1771 *
1772 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00001773 *
1774 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001775 */
1776
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001777xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001778xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001779 xmlChar buf[XML_MAX_NAMELEN + 5];
1780 int len = 0, l;
1781 int c;
1782 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001783
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001784 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001785 c = CUR_CHAR(l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001786
1787 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001788 (c == '.') || (c == '-') ||
1789 (c == '_') || (c == ':') ||
1790 (IS_COMBINING(c)) ||
1791 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001792 if (count++ > 100) {
1793 count = 0;
1794 GROW;
1795 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001796 COPY_BUF(l,buf,len,c);
1797 NEXTL(l);
1798 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001799 if (len >= XML_MAX_NAMELEN) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001800 /*
1801 * Okay someone managed to make a huge token, so he's ready to pay
1802 * for the processing speed.
1803 */
1804 xmlChar *buffer;
1805 int max = len * 2;
1806
1807 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1808 if (buffer == NULL) {
1809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1810 ctxt->sax->error(ctxt->userData,
1811 "xmlParseNmtoken: out of memory\n");
1812 return(NULL);
1813 }
1814 memcpy(buffer, buf, len);
1815 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001816 (c == '.') || (c == '-') ||
1817 (c == '_') || (c == ':') ||
1818 (IS_COMBINING(c)) ||
1819 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001820 if (count++ > 100) {
1821 count = 0;
1822 GROW;
1823 }
1824 if (len + 10 > max) {
1825 max *= 2;
1826 buffer = (xmlChar *) xmlRealloc(buffer,
1827 max * sizeof(xmlChar));
1828 if (buffer == NULL) {
1829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1830 ctxt->sax->error(ctxt->userData,
1831 "xmlParseName: out of memory\n");
1832 return(NULL);
1833 }
1834 }
1835 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00001836 NEXTL(l);
1837 c = CUR_CHAR(l);
1838 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001839 buffer[len] = 0;
1840 return(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001841 }
1842 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001843 if (len == 0)
1844 return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001845 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001846}
1847
Daniel Veillard11e00581998-10-24 18:27:49 +00001848/**
1849 * xmlParseEntityValue:
1850 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00001851 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00001852 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00001853 * parse a value for ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00001854 *
1855 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1856 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00001857 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001858 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001859 */
1860
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001861xmlChar *
1862xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001863 xmlChar *buf = NULL;
1864 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001865 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00001866 int c, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001867 xmlChar stop;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001868 xmlChar *ret = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001869 const xmlChar *cur = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001870 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001871
Daniel Veillardcf461992000-03-14 18:30:20 +00001872 if (RAW == '"') stop = '"';
1873 else if (RAW == '\'') stop = '\'';
Daniel Veillard10a2c651999-12-12 13:03:50 +00001874 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001875 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00001877 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001878 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001879 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001880 return(NULL);
1881 }
1882 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1883 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001884 xmlGenericError(xmlGenericErrorContext,
1885 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001886 return(NULL);
1887 }
1888
1889 /*
1890 * The content of the entity definition is copied in a buffer.
1891 */
1892
1893 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1894 input = ctxt->input;
1895 GROW;
1896 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00001897 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001898 /*
1899 * NOTE: 4.4.5 Included in Literal
1900 * When a parameter entity reference appears in a literal entity
1901 * value, ... a single or double quote character in the replacement
1902 * text is always treated as a normal data character and will not
1903 * terminate the literal.
1904 * In practice it means we stop the loop only when back at parsing
1905 * the initial entity and the quote is found
1906 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00001907 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1908 (ctxt->input != input))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001909 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001910 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001911 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00001912 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001913 xmlGenericError(xmlGenericErrorContext,
1914 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001915 return(NULL);
1916 }
1917 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001918 COPY_BUF(l,buf,len,c);
1919 NEXTL(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001920 /*
1921 * Pop-up of finished entities.
1922 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00001923 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
Daniel Veillard10a2c651999-12-12 13:03:50 +00001924 xmlPopInput(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00001925
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001926 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001927 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001928 if (c == 0) {
1929 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001930 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001931 }
1932 }
1933 buf[len] = 0;
1934
1935 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001936 * Raise problem w.r.t. '&' and '%' being used in non-entities
1937 * reference constructs. Note Charref will be handled in
1938 * xmlStringDecodeEntities()
1939 */
1940 cur = buf;
Daniel Veillard4948eb42000-08-29 09:41:15 +00001941 while (*cur != 0) { /* non input consuming */
Daniel Veillardcf461992000-03-14 18:30:20 +00001942 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1943 xmlChar *name;
1944 xmlChar tmp = *cur;
1945
1946 cur++;
1947 name = xmlParseStringName(ctxt, &cur);
1948 if ((name == NULL) || (*cur != ';')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00001949 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
Daniel Veillardcf461992000-03-14 18:30:20 +00001950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1951 ctxt->sax->error(ctxt->userData,
1952 "EntityValue: '%c' forbidden except for entities references\n",
1953 tmp);
1954 ctxt->wellFormed = 0;
1955 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001956 }
1957 if ((ctxt->inSubset == 1) && (tmp == '%')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00001958 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960 ctxt->sax->error(ctxt->userData,
1961 "EntityValue: PEReferences forbidden in internal subset\n",
1962 tmp);
1963 ctxt->wellFormed = 0;
1964 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001965 }
1966 if (name != NULL)
1967 xmlFree(name);
1968 }
1969 cur++;
1970 }
1971
1972 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00001973 * Then PEReference entities are substituted.
1974 */
1975 if (c != stop) {
1976 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1978 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1979 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001980 ctxt->disableSAX = 1;
1981 xmlFree(buf);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001982 } else {
1983 NEXT;
1984 /*
1985 * NOTE: 4.4.7 Bypassed
1986 * When a general entity reference appears in the EntityValue in
1987 * an entity declaration, it is bypassed and left as is.
Daniel Veillardcf461992000-03-14 18:30:20 +00001988 * so XML_SUBSTITUTE_REF is not set here.
Daniel Veillard10a2c651999-12-12 13:03:50 +00001989 */
1990 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1991 0, 0, 0);
1992 if (orig != NULL)
1993 *orig = buf;
1994 else
1995 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001996 }
1997
1998 return(ret);
1999}
2000
Daniel Veillard11e00581998-10-24 18:27:49 +00002001/**
2002 * xmlParseAttValue:
2003 * @ctxt: an XML parser context
2004 *
2005 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00002006 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00002007 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00002008 *
2009 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2010 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002011 *
Daniel Veillard7f858501999-11-17 17:32:38 +00002012 * 3.3.3 Attribute-Value Normalization:
2013 * Before the value of an attribute is passed to the application or
2014 * checked for validity, the XML processor must normalize it as follows:
2015 * - a character reference is processed by appending the referenced
2016 * character to the attribute value
2017 * - an entity reference is processed by recursively processing the
2018 * replacement text of the entity
2019 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2020 * appending #x20 to the normalized value, except that only a single
2021 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2022 * parsed entity or the literal entity value of an internal parsed entity
2023 * - other characters are processed by appending them to the normalized value
Daniel Veillard07136651999-11-18 14:02:05 +00002024 * If the declared value is not CDATA, then the XML processor must further
2025 * process the normalized attribute value by discarding any leading and
2026 * trailing space (#x20) characters, and by replacing sequences of space
2027 * (#x20) characters by a single space (#x20) character.
2028 * All attributes for which no declaration has been read should be treated
2029 * by a non-validating parser as if declared CDATA.
Daniel Veillard7f858501999-11-17 17:32:38 +00002030 *
2031 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002032 */
2033
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002034xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002035xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002036 xmlChar limit = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +00002037 xmlChar *buf = NULL;
2038 int len = 0;
2039 int buf_size = 0;
2040 int c, l;
Daniel Veillard7f858501999-11-17 17:32:38 +00002041 xmlChar *current = NULL;
2042 xmlEntityPtr ent;
Daniel Veillard7f858501999-11-17 17:32:38 +00002043
Daniel Veillard260a68f1998-08-13 03:39:55 +00002044
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002045 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002046 if (NXT(0) == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002047 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard7f858501999-11-17 17:32:38 +00002048 limit = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002049 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00002050 } else if (NXT(0) == '\'') {
Daniel Veillard7f858501999-11-17 17:32:38 +00002051 limit = '\'';
Daniel Veillardb05deb71999-08-10 19:04:08 +00002052 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002053 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002054 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002055 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002057 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002058 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002059 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00002060 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002061 }
2062
Daniel Veillard7f858501999-11-17 17:32:38 +00002063 /*
2064 * allocate a translation buffer.
2065 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002066 buf_size = XML_PARSER_BUFFER_SIZE;
2067 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2068 if (buf == NULL) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002069 perror("xmlParseAttValue: malloc failed");
2070 return(NULL);
2071 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002072
2073 /*
2074 * Ok loop until we reach one of the ending char or a size limit.
2075 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002076 c = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002077 while (((NXT(0) != limit) && /* checked */
2078 (c != '<')) || (ctxt->token != 0)) {
Daniel Veillardbe803962000-06-28 23:40:59 +00002079 if (c == 0) break;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002080 if (ctxt->token == '&') {
Daniel Veillardb1059e22000-09-16 14:02:43 +00002081 /*
2082 * The reparsing will be done in xmlStringGetNodeList()
2083 * called by the attribute() function in SAX.c
2084 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002085 static xmlChar buffer[6] = "&#38;";
2086
2087 if (len > buf_size - 10) {
2088 growBuffer(buf);
2089 }
2090 current = &buffer[0];
Daniel Veillard4948eb42000-08-29 09:41:15 +00002091 while (*current != 0) { /* non input consuming */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002092 buf[len++] = *current++;
2093 }
2094 ctxt->token = 0;
2095 } else if ((c == '&') && (NXT(1) == '#')) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002096 int val = xmlParseCharRef(ctxt);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002097 if (val == '&') {
2098 /*
2099 * The reparsing will be done in xmlStringGetNodeList()
2100 * called by the attribute() function in SAX.c
2101 */
2102 static xmlChar buffer[6] = "&#38;";
2103
2104 if (len > buf_size - 10) {
2105 growBuffer(buf);
2106 }
2107 current = &buffer[0];
2108 while (*current != 0) { /* non input consuming */
2109 buf[len++] = *current++;
2110 }
2111 } else {
Daniel Veillard1e851392000-10-15 10:02:56 +00002112 len += xmlCopyChar(0, &buf[len], val);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002113 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002114 } else if (c == '&') {
Daniel Veillard7f858501999-11-17 17:32:38 +00002115 ent = xmlParseEntityRef(ctxt);
2116 if ((ent != NULL) &&
2117 (ctxt->replaceEntities != 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002118 xmlChar *rep;
Daniel Veillard7f858501999-11-17 17:32:38 +00002119
Daniel Veillardcf461992000-03-14 18:30:20 +00002120 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2121 rep = xmlStringDecodeEntities(ctxt, ent->content,
2122 XML_SUBSTITUTE_REF, 0, 0, 0);
2123 if (rep != NULL) {
2124 current = rep;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002125 while (*current != 0) { /* non input consuming */
Daniel Veillardbe803962000-06-28 23:40:59 +00002126 buf[len++] = *current++;
2127 if (len > buf_size - 10) {
2128 growBuffer(buf);
Daniel Veillardcf461992000-03-14 18:30:20 +00002129 }
2130 }
2131 xmlFree(rep);
Daniel Veillard7f858501999-11-17 17:32:38 +00002132 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002133 } else {
2134 if (ent->content != NULL)
Daniel Veillardbe803962000-06-28 23:40:59 +00002135 buf[len++] = ent->content[0];
Daniel Veillard7f858501999-11-17 17:32:38 +00002136 }
2137 } else if (ent != NULL) {
2138 int i = xmlStrlen(ent->name);
2139 const xmlChar *cur = ent->name;
2140
Daniel Veillardcf461992000-03-14 18:30:20 +00002141 /*
2142 * This may look absurd but is needed to detect
2143 * entities problems
2144 */
Daniel Veillard87b95392000-08-12 21:12:04 +00002145 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2146 (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002147 xmlChar *rep;
2148 rep = xmlStringDecodeEntities(ctxt, ent->content,
2149 XML_SUBSTITUTE_REF, 0, 0, 0);
2150 if (rep != NULL)
2151 xmlFree(rep);
2152 }
2153
2154 /*
2155 * Just output the reference
2156 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002157 buf[len++] = '&';
2158 if (len > buf_size - i - 10) {
2159 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00002160 }
2161 for (;i > 0;i--)
Daniel Veillardbe803962000-06-28 23:40:59 +00002162 buf[len++] = *cur++;
2163 buf[len++] = ';';
Daniel Veillard7f858501999-11-17 17:32:38 +00002164 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002165 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00002166 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2167 COPY_BUF(l,buf,len,0x20);
2168 if (len > buf_size - 10) {
2169 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00002170 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002171 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00002172 COPY_BUF(l,buf,len,c);
2173 if (len > buf_size - 10) {
2174 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00002175 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002176 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002177 NEXTL(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00002178 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002179 GROW;
2180 c = CUR_CHAR(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00002181 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002182 buf[len++] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002183 if (RAW == '<') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002184 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillard7f858501999-11-17 17:32:38 +00002185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2186 ctxt->sax->error(ctxt->userData,
2187 "Unescaped '<' not allowed in attributes values\n");
Daniel Veillard7f858501999-11-17 17:32:38 +00002188 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002189 ctxt->disableSAX = 1;
2190 } else if (RAW != limit) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002191 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
Daniel Veillard7f858501999-11-17 17:32:38 +00002192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2193 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
Daniel Veillard7f858501999-11-17 17:32:38 +00002194 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002195 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00002196 } else
2197 NEXT;
Daniel Veillardbe803962000-06-28 23:40:59 +00002198 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002199}
2200
Daniel Veillard11e00581998-10-24 18:27:49 +00002201/**
2202 * xmlParseSystemLiteral:
2203 * @ctxt: an XML parser context
2204 *
2205 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00002206 *
2207 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00002208 *
2209 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002210 */
2211
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002212xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002213xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002214 xmlChar *buf = NULL;
2215 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002216 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00002217 int cur, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002218 xmlChar stop;
Daniel Veillardcf461992000-03-14 18:30:20 +00002219 int state = ctxt->instate;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002220 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002221
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002222 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002223 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002224 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002225 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00002226 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002227 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002228 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002229 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002230 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002232 ctxt->sax->error(ctxt->userData,
2233 "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002234 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002235 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002236 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002237 }
2238
Daniel Veillard10a2c651999-12-12 13:03:50 +00002239 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2240 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002241 xmlGenericError(xmlGenericErrorContext,
2242 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002243 return(NULL);
2244 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002245 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2246 cur = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002247 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Daniel Veillardcf461992000-03-14 18:30:20 +00002248 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002249 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002250 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002251 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002252 xmlGenericError(xmlGenericErrorContext,
2253 "realloc of %d byte failed\n", size);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002254 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002255 return(NULL);
2256 }
2257 }
Daniel Veillard4948eb42000-08-29 09:41:15 +00002258 count++;
2259 if (count > 50) {
2260 GROW;
2261 count = 0;
2262 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002263 COPY_BUF(l,buf,len,cur);
2264 NEXTL(l);
2265 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002266 if (cur == 0) {
2267 GROW;
2268 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002269 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002270 }
2271 }
2272 buf[len] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002273 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002274 if (!IS_CHAR(cur)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002275 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00002278 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002279 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002280 } else {
2281 NEXT;
2282 }
2283 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002284}
2285
Daniel Veillard11e00581998-10-24 18:27:49 +00002286/**
2287 * xmlParsePubidLiteral:
2288 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00002289 *
Daniel Veillard11e00581998-10-24 18:27:49 +00002290 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00002291 *
2292 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2293 *
2294 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002295 */
2296
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002297xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002298xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002299 xmlChar *buf = NULL;
2300 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002301 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002302 xmlChar cur;
2303 xmlChar stop;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002304 int count = 0;
Daniel Veillard6077d031999-10-09 09:11:45 +00002305
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002306 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002307 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002308 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002309 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00002310 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002311 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002312 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002313 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002314 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002316 ctxt->sax->error(ctxt->userData,
2317 "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002318 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002319 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002320 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002321 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002322 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2323 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002324 xmlGenericError(xmlGenericErrorContext,
2325 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002326 return(NULL);
2327 }
2328 cur = CUR;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002329 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002330 if (len + 1 >= size) {
2331 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002332 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002333 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002334 xmlGenericError(xmlGenericErrorContext,
2335 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002336 return(NULL);
2337 }
2338 }
2339 buf[len++] = cur;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002340 count++;
2341 if (count > 50) {
2342 GROW;
2343 count = 0;
2344 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002345 NEXT;
2346 cur = CUR;
2347 if (cur == 0) {
2348 GROW;
2349 SHRINK;
2350 cur = CUR;
2351 }
2352 }
2353 buf[len] = 0;
2354 if (cur != stop) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002355 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2357 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00002358 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002359 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002360 } else {
2361 NEXT;
2362 }
2363 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002364}
2365
Daniel Veillard11e00581998-10-24 18:27:49 +00002366/**
2367 * xmlParseCharData:
2368 * @ctxt: an XML parser context
2369 * @cdata: int indicating whether we are within a CDATA section
2370 *
2371 * parse a CharData section.
2372 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002373 *
Daniel Veillardcf461992000-03-14 18:30:20 +00002374 * The right angle bracket (>) may be represented using the string "&gt;",
2375 * and must, for compatibility, be escaped using "&gt;" or a character
2376 * reference when it appears in the string "]]>" in content, when that
2377 * string is not marking the end of a CDATA section.
2378 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002379 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2380 */
2381
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002382void
2383xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002384 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002385 int nbchar = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002386 int cur, l;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002387 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002388
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002389 SHRINK;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002390 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002391 cur = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002392 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
Daniel Veillard5e873c42000-04-12 13:27:38 +00002393 ((cur != '&') || (ctxt->token == '&')) &&
Daniel Veillardb1059e22000-09-16 14:02:43 +00002394 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002395 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002396 (NXT(2) == '>')) {
2397 if (cdata) break;
2398 else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002399 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00002401 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002402 "Sequence ']]>' not allowed in content\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002403 /* Should this be relaxed ??? I see a "must here */
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002406 }
2407 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002408 COPY_BUF(l,buf,nbchar,cur);
2409 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002410 /*
2411 * Ok the segment is to be consumed as chars.
2412 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002414 if (areBlanks(ctxt, buf, nbchar)) {
2415 if (ctxt->sax->ignorableWhitespace != NULL)
2416 ctxt->sax->ignorableWhitespace(ctxt->userData,
2417 buf, nbchar);
2418 } else {
2419 if (ctxt->sax->characters != NULL)
2420 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2421 }
2422 }
2423 nbchar = 0;
2424 }
Daniel Veillard4948eb42000-08-29 09:41:15 +00002425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002430 NEXTL(l);
2431 cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002432 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002433 if (nbchar != 0) {
2434 /*
2435 * Ok the segment is to be consumed as chars.
2436 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002437 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002438 if (areBlanks(ctxt, buf, nbchar)) {
2439 if (ctxt->sax->ignorableWhitespace != NULL)
2440 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2441 } else {
2442 if (ctxt->sax->characters != NULL)
2443 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2444 }
2445 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002446 }
2447}
2448
Daniel Veillard11e00581998-10-24 18:27:49 +00002449/**
2450 * xmlParseExternalID:
2451 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002452 * @publicID: a xmlChar** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002453 * @strict: indicate whether we should restrict parsing to only
2454 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00002455 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002456 * Parse an External ID or a Public ID
2457 *
2458 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2459 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00002460 *
2461 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2462 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002463 *
2464 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2465 *
2466 * Returns the function returns SystemLiteral and in the second
2467 * case publicID receives PubidLiteral, is strict is off
2468 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002469 */
2470
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002471xmlChar *
2472xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2473 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002474
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002475 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002476 if ((RAW == 'S') && (NXT(1) == 'Y') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002477 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2478 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2479 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002480 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002481 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002483 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002484 "Space required after 'SYSTEM'\n");
2485 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002486 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002487 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002488 SKIP_BLANKS;
2489 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002490 if (URI == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002491 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002493 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002494 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002495 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002496 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002497 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002498 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002499 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2500 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2501 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002502 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002503 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002505 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002506 "Space required after 'PUBLIC'\n");
2507 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002508 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002509 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002510 SKIP_BLANKS;
2511 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002512 if (*publicID == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002513 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002515 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002516 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002517 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002518 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002519 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00002520 if (strict) {
2521 /*
2522 * We don't handle [83] so "S SystemLiteral" is required.
2523 */
2524 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002525 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002527 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002528 "Space required after the Public Identifier\n");
2529 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002530 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002531 }
2532 } else {
2533 /*
2534 * We handle [83] so we return immediately, if
2535 * "S SystemLiteral" is not detected. From a purely parsing
2536 * point of view that's a nice mess.
2537 */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002538 const xmlChar *ptr;
2539 GROW;
2540
2541 ptr = CUR_PTR;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002542 if (!IS_BLANK(*ptr)) return(NULL);
2543
Daniel Veillard4948eb42000-08-29 09:41:15 +00002544 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Daniel Veillardcf461992000-03-14 18:30:20 +00002545 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002546 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002547 SKIP_BLANKS;
2548 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002549 if (URI == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002550 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002552 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002553 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002554 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002555 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002556 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002557 }
2558 return(URI);
2559}
2560
Daniel Veillard11e00581998-10-24 18:27:49 +00002561/**
2562 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00002563 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002564 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002565 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00002566 * The spec says that "For compatibility, the string "--" (double-hyphen)
2567 * must not occur within comments. "
2568 *
2569 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2570 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002571void
Daniel Veillardb96e6431999-08-29 21:02:19 +00002572xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002573 xmlChar *buf = NULL;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002574 int len;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002575 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00002576 int q, ql;
2577 int r, rl;
2578 int cur, l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002579 xmlParserInputState state;
Daniel Veillardcf461992000-03-14 18:30:20 +00002580 xmlParserInputPtr input = ctxt->input;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002581 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002582
2583 /*
2584 * Check that there is a comment right here.
2585 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002586 if ((RAW != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00002587 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002588
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002589 state = ctxt->instate;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002590 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002591 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002592 SKIP(4);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002593 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2594 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002595 xmlGenericError(xmlGenericErrorContext,
2596 "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002597 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002598 return;
2599 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002600 q = CUR_CHAR(ql);
2601 NEXTL(ql);
2602 r = CUR_CHAR(rl);
2603 NEXTL(rl);
2604 cur = CUR_CHAR(l);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002605 len = 0;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002606 while (IS_CHAR(cur) && /* checked */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002607 ((cur != '>') ||
2608 (r != '-') || (q != '-'))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002609 if ((r == '-') && (q == '-') && (len > 1)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002610 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002612 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002613 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002614 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002615 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002616 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002617 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002618 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002619 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002620 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002621 xmlGenericError(xmlGenericErrorContext,
2622 "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002623 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002624 return;
2625 }
2626 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002627 COPY_BUF(ql,buf,len,q);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002628 q = r;
Daniel Veillardcf461992000-03-14 18:30:20 +00002629 ql = rl;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002630 r = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00002631 rl = l;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002632
2633 count++;
2634 if (count > 50) {
2635 GROW;
2636 count = 0;
2637 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002638 NEXTL(l);
2639 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002640 if (cur == 0) {
2641 SHRINK;
2642 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002643 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002644 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002645 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002646 buf[len] = 0;
2647 if (!IS_CHAR(cur)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002648 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002650 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00002651 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002652 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002653 ctxt->disableSAX = 1;
2654 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002655 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002656 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002657 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2659 ctxt->sax->error(ctxt->userData,
2660"Comment doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002661 ctxt->wellFormed = 0;
2662 ctxt->disableSAX = 1;
2663 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002664 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00002665 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2666 (!ctxt->disableSAX))
Daniel Veillard10a2c651999-12-12 13:03:50 +00002667 ctxt->sax->comment(ctxt->userData, buf);
2668 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002669 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002670 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002671}
2672
Daniel Veillard11e00581998-10-24 18:27:49 +00002673/**
2674 * xmlParsePITarget:
2675 * @ctxt: an XML parser context
2676 *
2677 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00002678 *
2679 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00002680 *
2681 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002682 */
2683
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002684xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002685xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002686 xmlChar *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002687
2688 name = xmlParseName(ctxt);
Daniel Veillard3c558c31999-12-22 11:30:41 +00002689 if ((name != NULL) &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002690 ((name[0] == 'x') || (name[0] == 'X')) &&
2691 ((name[1] == 'm') || (name[1] == 'M')) &&
2692 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillard3c558c31999-12-22 11:30:41 +00002693 int i;
Daniel Veillardcf461992000-03-14 18:30:20 +00002694 if ((name[0] == 'x') && (name[1] == 'm') &&
2695 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002696 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillardcf461992000-03-14 18:30:20 +00002697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2698 ctxt->sax->error(ctxt->userData,
2699 "XML declaration allowed only at the start of the document\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002700 ctxt->wellFormed = 0;
2701 ctxt->disableSAX = 1;
2702 return(name);
2703 } else if (name[3] == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002704 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillardcf461992000-03-14 18:30:20 +00002705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2706 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 return(name);
2710 }
Daniel Veillard3c558c31999-12-22 11:30:41 +00002711 for (i = 0;;i++) {
2712 if (xmlW3CPIs[i] == NULL) break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +00002713 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
Daniel Veillard3c558c31999-12-22 11:30:41 +00002714 return(name);
2715 }
2716 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002717 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillard3c558c31999-12-22 11:30:41 +00002718 ctxt->sax->warning(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002719 "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002720 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002721 }
2722 return(name);
2723}
2724
Daniel Veillard11e00581998-10-24 18:27:49 +00002725/**
2726 * xmlParsePI:
2727 * @ctxt: an XML parser context
2728 *
2729 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002730 *
2731 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002732 *
2733 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002734 */
2735
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002736void
2737xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002738 xmlChar *buf = NULL;
2739 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002740 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00002741 int cur, l;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002742 xmlChar *target;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002743 xmlParserInputState state;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002744 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002745
Daniel Veillardcf461992000-03-14 18:30:20 +00002746 if ((RAW == '<') && (NXT(1) == '?')) {
2747 xmlParserInputPtr input = ctxt->input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002748 state = ctxt->instate;
2749 ctxt->instate = XML_PARSER_PI;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002750 /*
2751 * this is a Processing Instruction.
2752 */
2753 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002754 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002755
2756 /*
2757 * Parse the target name and check for special support like
2758 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002759 */
2760 target = xmlParsePITarget(ctxt);
2761 if (target != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002762 if ((RAW == '?') && (NXT(1) == '>')) {
2763 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002764 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2766 ctxt->sax->error(ctxt->userData,
2767 "PI declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002768 ctxt->wellFormed = 0;
2769 ctxt->disableSAX = 1;
2770 }
2771 SKIP(2);
2772
2773 /*
2774 * SAX: PI detected.
2775 */
2776 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2777 (ctxt->sax->processingInstruction != NULL))
2778 ctxt->sax->processingInstruction(ctxt->userData,
2779 target, NULL);
2780 ctxt->instate = state;
2781 xmlFree(target);
2782 return;
2783 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002784 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2785 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002786 xmlGenericError(xmlGenericErrorContext,
2787 "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002788 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002789 return;
2790 }
2791 cur = CUR;
2792 if (!IS_BLANK(cur)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "xmlParsePI: PI %s space expected\n", target);
2797 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002798 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002799 }
2800 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00002801 cur = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002802 while (IS_CHAR(cur) && /* checked */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002803 ((cur != '?') || (NXT(1) != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002804 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002805 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002806 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002807 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002808 xmlGenericError(xmlGenericErrorContext,
2809 "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002810 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002811 return;
2812 }
2813 }
Daniel Veillard4948eb42000-08-29 09:41:15 +00002814 count++;
2815 if (count > 50) {
2816 GROW;
2817 count = 0;
2818 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002819 COPY_BUF(l,buf,len,cur);
2820 NEXTL(l);
2821 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002822 if (cur == 0) {
2823 SHRINK;
2824 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002825 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002826 }
2827 }
2828 buf[len] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002829 if (cur != '?') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002830 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
Daniel Veillard517752b1999-04-05 12:20:10 +00002831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002832 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00002833 "xmlParsePI: PI %s never end ...\n", target);
2834 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002835 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002836 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002837 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002838 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002839 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2840 ctxt->sax->error(ctxt->userData,
2841 "PI declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002842 ctxt->wellFormed = 0;
2843 ctxt->disableSAX = 1;
2844 }
Daniel Veillard517752b1999-04-05 12:20:10 +00002845 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002846
Daniel Veillard517752b1999-04-05 12:20:10 +00002847 /*
2848 * SAX: PI detected.
2849 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002850 if ((ctxt->sax) && (!ctxt->disableSAX) &&
Daniel Veillard517752b1999-04-05 12:20:10 +00002851 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002852 ctxt->sax->processingInstruction(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00002853 target, buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002854 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002855 xmlFree(buf);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002856 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002857 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002858 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002860 ctxt->sax->error(ctxt->userData,
2861 "xmlParsePI : no target name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002862 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002863 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002864 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002865 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002866 }
2867}
2868
Daniel Veillard11e00581998-10-24 18:27:49 +00002869/**
2870 * xmlParseNotationDecl:
2871 * @ctxt: an XML parser context
2872 *
2873 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002874 *
2875 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2876 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002877 * Hence there is actually 3 choices:
2878 * 'PUBLIC' S PubidLiteral
2879 * 'PUBLIC' S PubidLiteral S SystemLiteral
2880 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00002881 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002882 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00002883 */
2884
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002885void
2886xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002887 xmlChar *name;
2888 xmlChar *Pubid;
2889 xmlChar *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002890
Daniel Veillardcf461992000-03-14 18:30:20 +00002891 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002892 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2893 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2894 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00002895 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002896 xmlParserInputPtr input = ctxt->input;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002897 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002898 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002899 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002900 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002902 ctxt->sax->error(ctxt->userData,
2903 "Space required after '<!NOTATION'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002904 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002905 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002906 return;
2907 }
2908 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002909
2910 name = xmlParseName(ctxt);
2911 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002912 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002914 ctxt->sax->error(ctxt->userData,
2915 "NOTATION: Name expected here\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00002916 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002917 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002918 return;
2919 }
2920 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002921 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002923 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002924 "Space required after the NOTATION name'\n");
2925 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002926 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002927 return;
2928 }
2929 SKIP_BLANKS;
2930
Daniel Veillard260a68f1998-08-13 03:39:55 +00002931 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002932 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002933 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002934 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002935 SKIP_BLANKS;
2936
Daniel Veillardcf461992000-03-14 18:30:20 +00002937 if (RAW == '>') {
2938 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002939 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2941 ctxt->sax->error(ctxt->userData,
2942"Notation declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002943 ctxt->wellFormed = 0;
2944 ctxt->disableSAX = 1;
2945 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002946 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00002947 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2948 (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002949 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002950 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002951 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002953 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002954 "'>' required to close NOTATION declaration\n");
2955 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002956 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002957 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00002958 xmlFree(name);
2959 if (Systemid != NULL) xmlFree(Systemid);
2960 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002961 }
2962}
2963
Daniel Veillard11e00581998-10-24 18:27:49 +00002964/**
2965 * xmlParseEntityDecl:
2966 * @ctxt: an XML parser context
2967 *
2968 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002969 *
2970 * [70] EntityDecl ::= GEDecl | PEDecl
2971 *
2972 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2973 *
2974 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2975 *
2976 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2977 *
2978 * [74] PEDef ::= EntityValue | ExternalID
2979 *
2980 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00002981 *
2982 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00002983 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002984 */
2985
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002986void
2987xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002988 xmlChar *name = NULL;
2989 xmlChar *value = NULL;
2990 xmlChar *URI = NULL, *literal = NULL;
2991 xmlChar *ndata = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002992 int isParameter = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002993 xmlChar *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002994
Daniel Veillardb05deb71999-08-10 19:04:08 +00002995 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002996 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002997 (NXT(2) == 'E') && (NXT(3) == 'N') &&
2998 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002999 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003000 xmlParserInputPtr input = ctxt->input;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003001 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003002 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003003 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003004 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003007 ctxt->sax->error(ctxt->userData,
3008 "Space required after '<!ENTITY'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003009 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003010 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003011 }
3012 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003013
Daniel Veillardcf461992000-03-14 18:30:20 +00003014 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003015 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003016 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003017 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003019 ctxt->sax->error(ctxt->userData,
3020 "Space required after '%'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003021 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003022 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003023 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003024 SKIP_BLANKS;
3025 isParameter = 1;
3026 }
3027
3028 name = xmlParseName(ctxt);
3029 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003030 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003031 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003032 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003033 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003034 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003035 return;
3036 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003037 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003038 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003040 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003041 "Space required after the entity name\n");
3042 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003043 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003044 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003045 SKIP_BLANKS;
3046
3047 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003048 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00003049 */
3050 if (isParameter) {
Daniel Veillard39c7d712000-09-10 16:14:55 +00003051 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003052 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003053 if (value) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003054 if ((ctxt->sax != NULL) &&
3055 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003056 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003057 XML_INTERNAL_PARAMETER_ENTITY,
3058 NULL, NULL, value);
3059 }
Daniel Veillard39c7d712000-09-10 16:14:55 +00003060 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003061 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00003062 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003063 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
Daniel Veillardcf461992000-03-14 18:30:20 +00003064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3065 ctxt->sax->error(ctxt->userData,
3066 "Entity value required\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003067 ctxt->wellFormed = 0;
3068 ctxt->disableSAX = 1;
3069 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003070 if (URI) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003071 xmlURIPtr uri;
3072
3073 uri = xmlParseURI((const char *) URI);
3074 if (uri == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003075 ctxt->errNo = XML_ERR_INVALID_URI;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003076 if ((ctxt->sax != NULL) &&
3077 (!ctxt->disableSAX) &&
3078 (ctxt->sax->error != NULL))
3079 ctxt->sax->error(ctxt->userData,
3080 "Invalid URI: %s\n", URI);
3081 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003082 } else {
3083 if (uri->fragment != NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003084 ctxt->errNo = XML_ERR_URI_FRAGMENT;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003085 if ((ctxt->sax != NULL) &&
3086 (!ctxt->disableSAX) &&
3087 (ctxt->sax->error != NULL))
3088 ctxt->sax->error(ctxt->userData,
3089 "Fragment not allowed: %s\n", URI);
3090 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003091 } else {
3092 if ((ctxt->sax != NULL) &&
3093 (!ctxt->disableSAX) &&
3094 (ctxt->sax->entityDecl != NULL))
3095 ctxt->sax->entityDecl(ctxt->userData, name,
3096 XML_EXTERNAL_PARAMETER_ENTITY,
3097 literal, URI, NULL);
3098 }
3099 xmlFreeURI(uri);
3100 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003101 }
3102 }
3103 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00003104 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003105 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillardcf461992000-03-14 18:30:20 +00003106 if ((ctxt->sax != NULL) &&
3107 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003108 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003109 XML_INTERNAL_GENERAL_ENTITY,
3110 NULL, NULL, value);
3111 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003112 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00003113 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003114 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
Daniel Veillardcf461992000-03-14 18:30:20 +00003115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3116 ctxt->sax->error(ctxt->userData,
3117 "Entity value required\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003118 ctxt->wellFormed = 0;
3119 ctxt->disableSAX = 1;
3120 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003121 if (URI) {
3122 xmlURIPtr uri;
3123
3124 uri = xmlParseURI((const char *)URI);
3125 if (uri == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003126 ctxt->errNo = XML_ERR_INVALID_URI;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003127 if ((ctxt->sax != NULL) &&
3128 (!ctxt->disableSAX) &&
3129 (ctxt->sax->error != NULL))
3130 ctxt->sax->error(ctxt->userData,
3131 "Invalid URI: %s\n", URI);
3132 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003133 } else {
3134 if (uri->fragment != NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003135 ctxt->errNo = XML_ERR_URI_FRAGMENT;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003136 if ((ctxt->sax != NULL) &&
3137 (!ctxt->disableSAX) &&
3138 (ctxt->sax->error != NULL))
3139 ctxt->sax->error(ctxt->userData,
3140 "Fragment not allowed: %s\n", URI);
3141 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003142 }
3143 xmlFreeURI(uri);
3144 }
3145 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003146 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003147 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003149 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003150 "Space required before 'NDATA'\n");
3151 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003152 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003153 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003154 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003155 if ((RAW == 'N') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003156 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3157 (NXT(4) == 'A')) {
3158 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003159 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003160 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003162 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003163 "Space required after 'NDATA'\n");
3164 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003165 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003166 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003167 SKIP_BLANKS;
3168 ndata = xmlParseName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00003170 (ctxt->sax->unparsedEntityDecl != NULL))
3171 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003172 literal, URI, ndata);
3173 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00003174 if ((ctxt->sax != NULL) &&
3175 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003176 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003177 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3178 literal, URI, NULL);
3179 }
3180 }
3181 }
3182 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003183 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003184 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003186 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003187 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003188 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003189 ctxt->disableSAX = 1;
3190 } else {
3191 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003192 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00003193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3194 ctxt->sax->error(ctxt->userData,
3195"Entity declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003196 ctxt->wellFormed = 0;
3197 ctxt->disableSAX = 1;
3198 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003199 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003200 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00003201 if (orig != NULL) {
3202 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00003203 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00003204 */
3205 xmlEntityPtr cur = NULL;
3206
Daniel Veillardb05deb71999-08-10 19:04:08 +00003207 if (isParameter) {
3208 if ((ctxt->sax != NULL) &&
3209 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003210 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003211 } else {
3212 if ((ctxt->sax != NULL) &&
3213 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003214 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003215 }
3216 if (cur != NULL) {
3217 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003218 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003219 else
3220 cur->orig = orig;
3221 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00003222 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00003223 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003224 if (name != NULL) xmlFree(name);
3225 if (value != NULL) xmlFree(value);
3226 if (URI != NULL) xmlFree(URI);
3227 if (literal != NULL) xmlFree(literal);
3228 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003229 }
3230}
3231
Daniel Veillard11e00581998-10-24 18:27:49 +00003232/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003233 * xmlParseDefaultDecl:
3234 * @ctxt: an XML parser context
3235 * @value: Receive a possible fixed default value for the attribute
3236 *
3237 * Parse an attribute default declaration
3238 *
3239 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3240 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003241 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003242 * if the default declaration is the keyword #REQUIRED, then the
3243 * attribute must be specified for all elements of the type in the
3244 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003245 *
3246 * [ VC: Attribute Default Legal ]
3247 * The declared default value must meet the lexical constraints of
3248 * the declared attribute type c.f. xmlValidateAttributeDecl()
3249 *
3250 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003251 * if an attribute has a default value declared with the #FIXED
3252 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003253 *
3254 * [ WFC: No < in Attribute Values ]
3255 * handled in xmlParseAttValue()
3256 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003257 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3258 * or XML_ATTRIBUTE_FIXED.
3259 */
3260
3261int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003262xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003263 int val;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003264 xmlChar *ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003265
3266 *value = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003267 if ((RAW == '#') && (NXT(1) == 'R') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003268 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3269 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3270 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3271 (NXT(8) == 'D')) {
3272 SKIP(9);
3273 return(XML_ATTRIBUTE_REQUIRED);
3274 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003275 if ((RAW == '#') && (NXT(1) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003276 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3277 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3278 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3279 SKIP(8);
3280 return(XML_ATTRIBUTE_IMPLIED);
3281 }
3282 val = XML_ATTRIBUTE_NONE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003283 if ((RAW == '#') && (NXT(1) == 'F') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003284 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3285 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3286 SKIP(6);
3287 val = XML_ATTRIBUTE_FIXED;
3288 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003289 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003291 ctxt->sax->error(ctxt->userData,
3292 "Space required after '#FIXED'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003293 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003294 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003295 }
3296 SKIP_BLANKS;
3297 }
3298 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003299 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003300 if (ret == NULL) {
3301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003302 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003303 "Attribute default value declaration error\n");
3304 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003305 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003306 } else
3307 *value = ret;
3308 return(val);
3309}
3310
3311/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00003312 * xmlParseNotationType:
3313 * @ctxt: an XML parser context
3314 *
3315 * parse an Notation attribute type.
3316 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003317 * Note: the leading 'NOTATION' S part has already being parsed...
3318 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003319 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3320 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003321 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003322 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00003323 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003324 *
3325 * Returns: the notation attribute tree built while parsing
3326 */
3327
3328xmlEnumerationPtr
3329xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003330 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003331 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3332
Daniel Veillardcf461992000-03-14 18:30:20 +00003333 if (RAW != '(') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003334 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003336 ctxt->sax->error(ctxt->userData,
3337 "'(' required to start 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003338 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003339 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003340 return(NULL);
3341 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003342 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003343 do {
3344 NEXT;
3345 SKIP_BLANKS;
3346 name = xmlParseName(ctxt);
3347 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003348 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003350 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003351 "Name expected in NOTATION declaration\n");
3352 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003353 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003354 return(ret);
3355 }
3356 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003357 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003358 if (cur == NULL) return(ret);
3359 if (last == NULL) ret = last = cur;
3360 else {
3361 last->next = cur;
3362 last = cur;
3363 }
3364 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003365 } while (RAW == '|');
3366 if (RAW != ')') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003367 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003369 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003370 "')' required to finish NOTATION declaration\n");
3371 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003372 ctxt->disableSAX = 1;
3373 if ((last != NULL) && (last != ret))
3374 xmlFreeEnumeration(last);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003375 return(ret);
3376 }
3377 NEXT;
3378 return(ret);
3379}
3380
3381/**
3382 * xmlParseEnumerationType:
3383 * @ctxt: an XML parser context
3384 *
3385 * parse an Enumeration attribute type.
3386 *
3387 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3388 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003389 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003390 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00003391 * the declaration
3392 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003393 * Returns: the enumeration attribute tree built while parsing
3394 */
3395
3396xmlEnumerationPtr
3397xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003398 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003399 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3400
Daniel Veillardcf461992000-03-14 18:30:20 +00003401 if (RAW != '(') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003402 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003404 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003405 "'(' required to start ATTLIST enumeration\n");
3406 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003407 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003408 return(NULL);
3409 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003410 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003411 do {
3412 NEXT;
3413 SKIP_BLANKS;
3414 name = xmlParseNmtoken(ctxt);
3415 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003416 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003417 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003418 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003419 "NmToken expected in ATTLIST enumeration\n");
3420 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003421 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003422 return(ret);
3423 }
3424 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003425 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003426 if (cur == NULL) return(ret);
3427 if (last == NULL) ret = last = cur;
3428 else {
3429 last->next = cur;
3430 last = cur;
3431 }
3432 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003433 } while (RAW == '|');
3434 if (RAW != ')') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003435 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003437 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003438 "')' required to finish ATTLIST enumeration\n");
3439 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003440 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003441 return(ret);
3442 }
3443 NEXT;
3444 return(ret);
3445}
3446
3447/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003448 * xmlParseEnumeratedType:
3449 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003450 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003451 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003452 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003453 *
3454 * [57] EnumeratedType ::= NotationType | Enumeration
3455 *
3456 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3457 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003458 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003459 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00003460 */
3461
Daniel Veillard1e346af1999-02-22 10:33:01 +00003462int
3463xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003464 if ((RAW == 'N') && (NXT(1) == 'O') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003465 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3466 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3467 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3468 SKIP(8);
3469 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003470 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003472 ctxt->sax->error(ctxt->userData,
3473 "Space required after 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003474 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003475 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003476 return(0);
3477 }
3478 SKIP_BLANKS;
3479 *tree = xmlParseNotationType(ctxt);
3480 if (*tree == NULL) return(0);
3481 return(XML_ATTRIBUTE_NOTATION);
3482 }
3483 *tree = xmlParseEnumerationType(ctxt);
3484 if (*tree == NULL) return(0);
3485 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003486}
3487
Daniel Veillard11e00581998-10-24 18:27:49 +00003488/**
3489 * xmlParseAttributeType:
3490 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003491 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003492 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003493 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003494 *
3495 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3496 *
3497 * [55] StringType ::= 'CDATA'
3498 *
3499 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3500 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00003501 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003502 * Validity constraints for attribute values syntax are checked in
3503 * xmlValidateAttributeValue()
3504 *
3505 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003506 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00003507 * appear more than once in an XML document as a value of this type;
3508 * i.e., ID values must uniquely identify the elements which bear them.
3509 *
3510 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003511 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003512 *
3513 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003514 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003515 *
3516 * [ VC: IDREF ]
3517 * Values of type IDREF must match the Name production, and values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003518 * of type IDREFS must match Names; each IDREF Name must match the value
Daniel Veillardb96e6431999-08-29 21:02:19 +00003519 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00003520 * values must match the value of some ID attribute.
3521 *
3522 * [ VC: Entity Name ]
3523 * Values of type ENTITY must match the Name production, values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003524 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardb96e6431999-08-29 21:02:19 +00003525 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003526 *
3527 * [ VC: Name Token ]
3528 * Values of type NMTOKEN must match the Nmtoken production; values
3529 * of type NMTOKENS must match Nmtokens.
3530 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003531 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00003532 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003533int
Daniel Veillard1e346af1999-02-22 10:33:01 +00003534xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003535 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003536 if ((RAW == 'C') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003537 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3538 (NXT(4) == 'A')) {
3539 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003540 return(XML_ATTRIBUTE_CDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00003541 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003542 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00003543 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3544 SKIP(6);
3545 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillardcf461992000-03-14 18:30:20 +00003546 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00003547 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003548 (NXT(4) == 'F')) {
3549 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003550 return(XML_ATTRIBUTE_IDREF);
Daniel Veillardcf461992000-03-14 18:30:20 +00003551 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003552 SKIP(2);
3553 return(XML_ATTRIBUTE_ID);
Daniel Veillardcf461992000-03-14 18:30:20 +00003554 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003555 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3556 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3557 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003558 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillardcf461992000-03-14 18:30:20 +00003559 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003560 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3561 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3562 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3563 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003564 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillardcf461992000-03-14 18:30:20 +00003565 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003566 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3567 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003568 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3569 SKIP(8);
3570 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillardcf461992000-03-14 18:30:20 +00003571 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003572 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3573 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003574 (NXT(6) == 'N')) {
3575 SKIP(7);
3576 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003577 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00003578 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003579}
3580
Daniel Veillard11e00581998-10-24 18:27:49 +00003581/**
3582 * xmlParseAttributeListDecl:
3583 * @ctxt: an XML parser context
3584 *
3585 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003586 *
3587 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3588 *
3589 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00003590 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003591 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003592void
3593xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003594 xmlChar *elemName;
3595 xmlChar *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003596 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003597
Daniel Veillardcf461992000-03-14 18:30:20 +00003598 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003599 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3600 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3601 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003602 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003603 xmlParserInputPtr input = ctxt->input;
3604
Daniel Veillard260a68f1998-08-13 03:39:55 +00003605 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003606 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003607 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003609 ctxt->sax->error(ctxt->userData,
3610 "Space required after '<!ATTLIST'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003611 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003612 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003613 }
3614 SKIP_BLANKS;
3615 elemName = xmlParseName(ctxt);
3616 if (elemName == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003617 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003619 ctxt->sax->error(ctxt->userData,
3620 "ATTLIST: no name for Element\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003621 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003622 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003623 return;
3624 }
3625 SKIP_BLANKS;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00003626 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003627 while (RAW != '>') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003628 const xmlChar *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003629 int type;
3630 int def;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003631 xmlChar *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003632
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00003633 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003634 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003635 attrName = xmlParseName(ctxt);
3636 if (attrName == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003637 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003639 ctxt->sax->error(ctxt->userData,
3640 "ATTLIST: no name for Attribute\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003641 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003642 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003643 break;
3644 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00003645 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003646 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003647 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003649 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003650 "Space required after the attribute name\n");
3651 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003652 ctxt->disableSAX = 1;
3653 if (attrName != NULL)
3654 xmlFree(attrName);
3655 if (defaultValue != NULL)
3656 xmlFree(defaultValue);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003657 break;
3658 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003659 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003660
Daniel Veillard1e346af1999-02-22 10:33:01 +00003661 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillardcf461992000-03-14 18:30:20 +00003662 if (type <= 0) {
3663 if (attrName != NULL)
3664 xmlFree(attrName);
3665 if (defaultValue != NULL)
3666 xmlFree(defaultValue);
3667 break;
3668 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003669
Daniel Veillardb05deb71999-08-10 19:04:08 +00003670 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003671 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003672 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003674 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003675 "Space required after the attribute type\n");
3676 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003677 ctxt->disableSAX = 1;
3678 if (attrName != NULL)
3679 xmlFree(attrName);
3680 if (defaultValue != NULL)
3681 xmlFree(defaultValue);
3682 if (tree != NULL)
3683 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003684 break;
3685 }
3686 SKIP_BLANKS;
3687
3688 def = xmlParseDefaultDecl(ctxt, &defaultValue);
Daniel Veillardcf461992000-03-14 18:30:20 +00003689 if (def <= 0) {
3690 if (attrName != NULL)
3691 xmlFree(attrName);
3692 if (defaultValue != NULL)
3693 xmlFree(defaultValue);
3694 if (tree != NULL)
3695 xmlFreeEnumeration(tree);
3696 break;
3697 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003698
Daniel Veillardb05deb71999-08-10 19:04:08 +00003699 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003700 if (RAW != '>') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003701 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003702 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003704 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003705 "Space required after the attribute default value\n");
3706 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003707 ctxt->disableSAX = 1;
3708 if (attrName != NULL)
3709 xmlFree(attrName);
3710 if (defaultValue != NULL)
3711 xmlFree(defaultValue);
3712 if (tree != NULL)
3713 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003714 break;
3715 }
3716 SKIP_BLANKS;
3717 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003718 if (check == CUR_PTR) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003719 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003721 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003722 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003723 if (attrName != NULL)
3724 xmlFree(attrName);
3725 if (defaultValue != NULL)
3726 xmlFree(defaultValue);
3727 if (tree != NULL)
3728 xmlFreeEnumeration(tree);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003729 break;
3730 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003731 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3732 (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003733 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003734 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003735 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003736 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003737 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003738 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003739 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003740 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003741 if (RAW == '>') {
3742 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003743 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00003744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3745 ctxt->sax->error(ctxt->userData,
3746"Attribute list declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003747 ctxt->wellFormed = 0;
3748 ctxt->disableSAX = 1;
3749 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003750 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003751 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003752
Daniel Veillard6454aec1999-09-02 22:04:43 +00003753 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003754 }
3755}
3756
Daniel Veillard11e00581998-10-24 18:27:49 +00003757/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003758 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00003759 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003760 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003761 * parse the declaration for a Mixed Element content
3762 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00003763 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003764 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3765 * '(' S? '#PCDATA' S? ')'
3766 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003767 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3768 *
3769 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003770 * The same name must not appear more than once in a single
3771 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003772 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003773 * returns: the list of the xmlElementContentPtr describing the element choices
3774 */
3775xmlElementContentPtr
3776xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00003777 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003778 xmlChar *elem = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003779
Daniel Veillardb05deb71999-08-10 19:04:08 +00003780 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003781 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003782 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3783 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3784 (NXT(6) == 'A')) {
3785 SKIP(7);
3786 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003787 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003788 if (RAW == ')') {
3789 ctxt->entity = ctxt->input;
Daniel Veillard3b9def11999-01-31 22:15:06 +00003790 NEXT;
3791 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00003792 if (RAW == '*') {
Daniel Veillardf600e251999-12-18 15:32:46 +00003793 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3794 NEXT;
3795 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00003796 return(ret);
3797 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003798 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003799 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3800 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003801 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003802 while (RAW == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00003803 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003804 if (elem == NULL) {
3805 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3806 if (ret == NULL) return(NULL);
3807 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00003808 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003809 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00003810 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3811 if (n == NULL) return(NULL);
3812 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3813 cur->c2 = n;
3814 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00003815 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003816 }
3817 SKIP_BLANKS;
3818 elem = xmlParseName(ctxt);
3819 if (elem == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003820 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003822 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003823 "xmlParseElementMixedContentDecl : Name expected\n");
3824 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003825 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003826 xmlFreeElementContent(cur);
3827 return(NULL);
3828 }
3829 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003830 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003831 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003832 if ((RAW == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003833 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003834 cur->c2 = xmlNewElementContent(elem,
3835 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003836 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003837 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003838 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003839 ctxt->entity = ctxt->input;
Daniel Veillard1899e851999-02-01 12:18:54 +00003840 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003841 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00003842 if (elem != NULL) xmlFree(elem);
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003843 xmlFreeElementContent(ret);
3844 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003846 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00003847 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003848 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003849 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003850 return(NULL);
3851 }
3852
3853 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003854 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003856 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003857 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3858 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003859 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003860 }
3861 return(ret);
3862}
3863
3864/**
3865 * xmlParseElementChildrenContentDecl:
3866 * @ctxt: an XML parser context
3867 *
3868 * parse the declaration for a Mixed Element content
3869 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3870 *
3871 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003872 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3873 *
3874 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3875 *
3876 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3877 *
3878 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3879 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003880 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3881 * TODO Parameter-entity replacement text must be properly nested
3882 * with parenthetized groups. That is to say, if either of the
3883 * opening or closing parentheses in a choice, seq, or Mixed
3884 * construct is contained in the replacement text for a parameter
3885 * entity, both must be contained in the same replacement text. For
3886 * interoperability, if a parameter-entity reference appears in a
3887 * choice, seq, or Mixed construct, its replacement text should not
3888 * be empty, and neither the first nor last non-blank character of
3889 * the replacement text should be a connector (| or ,).
3890 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003891 * returns: the tree of xmlElementContentPtr describing the element
3892 * hierarchy.
3893 */
3894xmlElementContentPtr
Daniel Veillardce6e98d2000-11-25 09:54:49 +00003895#ifdef VMS
3896xmlParseElementChildrenContentD
3897#else
3898xmlParseElementChildrenContentDecl
3899#endif
3900(xmlParserCtxtPtr ctxt) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003901 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003902 xmlChar *elem;
3903 xmlChar type = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003904
3905 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003906 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003907 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003908 /* Recurse on first child */
3909 NEXT;
3910 SKIP_BLANKS;
3911 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3912 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003913 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003914 } else {
3915 elem = xmlParseName(ctxt);
3916 if (elem == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003917 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003919 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003920 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3921 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003922 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003923 return(NULL);
3924 }
3925 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003926 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003927 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003928 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003929 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003930 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003931 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003932 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003933 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003934 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003935 NEXT;
3936 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003937 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003938 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003939 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003940 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003941 }
3942 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003943 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003944 while (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003945 /*
3946 * Each loop we parse one separator and one element.
3947 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003948 if (RAW == ',') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003949 if (type == 0) type = CUR;
3950
3951 /*
3952 * Detect "Name | Name , Name" error
3953 */
3954 else if (type != CUR) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003955 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003957 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003958 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3959 type);
3960 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003961 ctxt->disableSAX = 1;
3962 if ((op != NULL) && (op != ret))
3963 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00003964 if ((last != NULL) && (last != ret) &&
3965 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00003966 xmlFreeElementContent(last);
3967 if (ret != NULL)
3968 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003969 return(NULL);
3970 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003971 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003972
3973 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3974 if (op == NULL) {
3975 xmlFreeElementContent(ret);
3976 return(NULL);
3977 }
3978 if (last == NULL) {
3979 op->c1 = ret;
3980 ret = cur = op;
3981 } else {
3982 cur->c2 = op;
3983 op->c1 = last;
3984 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00003985 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003986 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003987 } else if (RAW == '|') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003988 if (type == 0) type = CUR;
3989
3990 /*
3991 * Detect "Name , Name | Name" error
3992 */
3993 else if (type != CUR) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003994 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003996 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003997 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3998 type);
3999 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004000 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00004001 if ((op != NULL) && (op != ret) && (op != last))
Daniel Veillardcf461992000-03-14 18:30:20 +00004002 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004003 if ((last != NULL) && (last != ret) &&
4004 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004005 xmlFreeElementContent(last);
4006 if (ret != NULL)
4007 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004008 return(NULL);
4009 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004010 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004011
4012 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4013 if (op == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004014 if ((op != NULL) && (op != ret))
4015 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004016 if ((last != NULL) && (last != ret) &&
4017 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004018 xmlFreeElementContent(last);
4019 if (ret != NULL)
4020 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004021 return(NULL);
4022 }
4023 if (last == NULL) {
4024 op->c1 = ret;
4025 ret = cur = op;
4026 } else {
4027 cur->c2 = op;
4028 op->c1 = last;
4029 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00004030 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004031 }
4032 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004033 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004035 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004036 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4037 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004038 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004039 if ((op != NULL) && (op != ret))
4040 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004041 if ((last != NULL) && (last != ret) &&
4042 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004043 xmlFreeElementContent(last);
4044 if (ret != NULL)
4045 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004046 return(NULL);
4047 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004048 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004049 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004050 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004051 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004052 /* Recurse on second child */
4053 NEXT;
4054 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00004055 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004056 SKIP_BLANKS;
4057 } else {
4058 elem = xmlParseName(ctxt);
4059 if (elem == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004060 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004062 ctxt->sax->error(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004063 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004064 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004065 ctxt->disableSAX = 1;
4066 if ((op != NULL) && (op != ret))
4067 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004068 if ((last != NULL) && (last != ret) &&
4069 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004070 xmlFreeElementContent(last);
4071 if (ret != NULL)
4072 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004073 return(NULL);
4074 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004075 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004076 xmlFree(elem);
Daniel Veillardcf461992000-03-14 18:30:20 +00004077 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004078 last->ocur = XML_ELEMENT_CONTENT_OPT;
4079 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004080 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004081 last->ocur = XML_ELEMENT_CONTENT_MULT;
4082 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004083 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004084 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4085 NEXT;
4086 } else {
4087 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4088 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004089 }
4090 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004091 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004092 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004093 if ((cur != NULL) && (last != NULL)) {
4094 cur->c2 = last;
4095 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004096 ctxt->entity = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004097 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004098 if (RAW == '?') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004099 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4100 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004101 } else if (RAW == '*') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004102 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4103 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004104 } else if (RAW == '+') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004105 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4106 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004107 }
4108 return(ret);
4109}
4110
4111/**
4112 * xmlParseElementContentDecl:
4113 * @ctxt: an XML parser context
4114 * @name: the name of the element being defined.
4115 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00004116 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004117 * parse the declaration for an Element content either Mixed or Children,
4118 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4119 *
4120 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00004121 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004122 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00004123 */
4124
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004125int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004126xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004127 xmlElementContentPtr *result) {
4128
4129 xmlElementContentPtr tree = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00004130 xmlParserInputPtr input = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004131 int res;
4132
4133 *result = NULL;
4134
Daniel Veillardcf461992000-03-14 18:30:20 +00004135 if (RAW != '(') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004136 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004138 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004139 "xmlParseElementContentDecl : '(' expected\n");
4140 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004141 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004142 return(-1);
4143 }
4144 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004145 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004146 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004147 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004148 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4149 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4150 (NXT(6) == 'A')) {
4151 tree = xmlParseElementMixedContentDecl(ctxt);
4152 res = XML_ELEMENT_TYPE_MIXED;
4153 } else {
4154 tree = xmlParseElementChildrenContentDecl(ctxt);
4155 res = XML_ELEMENT_TYPE_ELEMENT;
4156 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004157 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004158 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4160 ctxt->sax->error(ctxt->userData,
4161"Element content declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004162 ctxt->wellFormed = 0;
4163 ctxt->disableSAX = 1;
4164 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004165 SKIP_BLANKS;
Daniel Veillard3b9def11999-01-31 22:15:06 +00004166 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004167 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004168}
4169
Daniel Veillard11e00581998-10-24 18:27:49 +00004170/**
4171 * xmlParseElementDecl:
4172 * @ctxt: an XML parser context
4173 *
4174 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004175 *
4176 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4177 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004178 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004179 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00004180 *
4181 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00004182 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004183int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004184xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004185 xmlChar *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004186 int ret = -1;
4187 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004188
Daniel Veillardb05deb71999-08-10 19:04:08 +00004189 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004190 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004191 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4192 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4193 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004194 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004195 xmlParserInputPtr input = ctxt->input;
4196
Daniel Veillard260a68f1998-08-13 03:39:55 +00004197 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004198 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004199 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004201 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004202 "Space required after 'ELEMENT'\n");
4203 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004204 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004205 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004206 SKIP_BLANKS;
4207 name = xmlParseName(ctxt);
4208 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004209 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004211 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004212 "xmlParseElementDecl: no name for Element\n");
4213 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004214 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004215 return(-1);
4216 }
4217 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004218 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004220 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004221 "Space required after the element name\n");
4222 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004223 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004224 }
4225 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004226 if ((RAW == 'E') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004227 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4228 (NXT(4) == 'Y')) {
4229 SKIP(5);
4230 /*
4231 * Element must always be empty.
4232 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004233 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004234 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004235 (NXT(2) == 'Y')) {
4236 SKIP(3);
4237 /*
4238 * Element is a generic container.
4239 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004240 ret = XML_ELEMENT_TYPE_ANY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004241 } else if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004242 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004243 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004244 /*
4245 * [ WFC: PEs in Internal Subset ] error handling.
4246 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004247 if ((RAW == '%') && (ctxt->external == 0) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00004248 (ctxt->inputNr == 1)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004249 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4251 ctxt->sax->error(ctxt->userData,
4252 "PEReference: forbidden within markup decl in internal subset\n");
4253 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004254 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4256 ctxt->sax->error(ctxt->userData,
4257 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4258 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004259 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004260 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004261 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004262 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004263 }
Daniel Veillard686d6b62000-01-03 11:08:02 +00004264
Daniel Veillard260a68f1998-08-13 03:39:55 +00004265 SKIP_BLANKS;
Daniel Veillard686d6b62000-01-03 11:08:02 +00004266 /*
4267 * Pop-up of finished entities.
4268 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004269 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard686d6b62000-01-03 11:08:02 +00004270 xmlPopInput(ctxt);
4271 SKIP_BLANKS;
4272
Daniel Veillardcf461992000-03-14 18:30:20 +00004273 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004274 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004276 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004277 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004278 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004279 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004280 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004281 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284 ctxt->sax->error(ctxt->userData,
4285"Element declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004286 ctxt->wellFormed = 0;
4287 ctxt->disableSAX = 1;
4288 }
4289
Daniel Veillard260a68f1998-08-13 03:39:55 +00004290 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004291 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4292 (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004293 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4294 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004295 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004296 if (content != NULL) {
4297 xmlFreeElementContent(content);
4298 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004299 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00004300 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004301 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004302 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004303 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004304}
4305
Daniel Veillard11e00581998-10-24 18:27:49 +00004306/**
4307 * xmlParseMarkupDecl:
4308 * @ctxt: an XML parser context
4309 *
4310 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004311 *
4312 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4313 * NotationDecl | PI | Comment
4314 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004315 * [ VC: Proper Declaration/PE Nesting ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00004316 * Parameter-entity replacement text must be properly nested with
Daniel Veillardb05deb71999-08-10 19:04:08 +00004317 * markup declarations. That is to say, if either the first character
4318 * or the last character of a markup declaration (markupdecl above) is
4319 * contained in the replacement text for a parameter-entity reference,
4320 * both must be contained in the same replacement text.
4321 *
4322 * [ WFC: PEs in Internal Subset ]
4323 * In the internal DTD subset, parameter-entity references can occur
4324 * only where markup declarations can occur, not within markup declarations.
4325 * (This does not apply to references that occur in external parameter
4326 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004327 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004328void
4329xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004330 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004331 xmlParseElementDecl(ctxt);
4332 xmlParseAttributeListDecl(ctxt);
4333 xmlParseEntityDecl(ctxt);
4334 xmlParseNotationDecl(ctxt);
4335 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00004336 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004337 /*
4338 * This is only for internal subset. On external entities,
4339 * the replacement is done before parsing stage
4340 */
4341 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4342 xmlParsePEReference(ctxt);
4343 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004344}
4345
Daniel Veillard11e00581998-10-24 18:27:49 +00004346/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004347 * xmlParseTextDecl:
4348 * @ctxt: an XML parser context
4349 *
4350 * parse an XML declaration header for external entities
4351 *
4352 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4353 *
Daniel Veillardcf461992000-03-14 18:30:20 +00004354 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
Daniel Veillard011b63c1999-06-02 17:44:04 +00004355 */
4356
Daniel Veillardcf461992000-03-14 18:30:20 +00004357void
Daniel Veillard011b63c1999-06-02 17:44:04 +00004358xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004359 xmlChar *version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004360
4361 /*
4362 * We know that '<?xml' is here.
4363 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004364 if ((RAW == '<') && (NXT(1) == '?') &&
4365 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4366 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4367 SKIP(5);
4368 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004369 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4371 ctxt->sax->error(ctxt->userData,
4372 "Text declaration '<?xml' required\n");
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004373 ctxt->wellFormed = 0;
4374 ctxt->disableSAX = 1;
4375
4376 return;
4377 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004378
4379 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004380 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004382 ctxt->sax->error(ctxt->userData,
4383 "Space needed after '<?xml'\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004384 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004385 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004386 }
4387 SKIP_BLANKS;
4388
4389 /*
4390 * We may have the VersionInfo here.
4391 */
4392 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004393 if (version == NULL)
4394 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00004395 ctxt->input->version = version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004396
4397 /*
4398 * We must have the encoding declaration
4399 */
4400 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004401 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004403 ctxt->sax->error(ctxt->userData, "Space needed here\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004404 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004405 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004406 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004407 xmlParseEncodingDecl(ctxt);
4408 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4409 /*
4410 * The XML REC instructs us to stop parsing right here
4411 */
4412 return;
4413 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004414
4415 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004416 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004417 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00004418 } else if (RAW == '>') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004419 /* Deprecated old WD ... */
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004420 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004422 ctxt->sax->error(ctxt->userData,
4423 "XML declaration must end-up with '?>'\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004424 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004425 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004426 NEXT;
4427 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004428 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004430 ctxt->sax->error(ctxt->userData,
4431 "parsing XML declaration: '?>' expected\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004432 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004433 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004434 MOVETO_ENDTAG(CUR_PTR);
4435 NEXT;
4436 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004437}
4438
4439/*
4440 * xmlParseConditionalSections
4441 * @ctxt: an XML parser context
4442 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004443 * [61] conditionalSect ::= includeSect | ignoreSect
4444 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4445 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4446 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4447 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4448 */
4449
4450void
4451xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004452 SKIP(3);
4453 SKIP_BLANKS;
4454 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4455 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4456 (NXT(6) == 'E')) {
4457 SKIP(7);
4458 SKIP_BLANKS;
4459 if (RAW != '[') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004460 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
Daniel Veillardcf461992000-03-14 18:30:20 +00004461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4462 ctxt->sax->error(ctxt->userData,
4463 "XML conditional section '[' expected\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004464 ctxt->wellFormed = 0;
4465 ctxt->disableSAX = 1;
4466 } else {
4467 NEXT;
4468 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004469 if (xmlParserDebugEntities) {
4470 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004471 xmlGenericError(xmlGenericErrorContext,
4472 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004473 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004474 xmlGenericError(xmlGenericErrorContext,
4475 "Entering INCLUDE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004476 }
4477
Daniel Veillardcf461992000-03-14 18:30:20 +00004478 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4479 (NXT(2) != '>'))) {
4480 const xmlChar *check = CUR_PTR;
4481 int cons = ctxt->input->consumed;
4482 int tok = ctxt->token;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004483
Daniel Veillardcf461992000-03-14 18:30:20 +00004484 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4485 xmlParseConditionalSections(ctxt);
4486 } else if (IS_BLANK(CUR)) {
4487 NEXT;
4488 } else if (RAW == '%') {
4489 xmlParsePEReference(ctxt);
4490 } else
4491 xmlParseMarkupDecl(ctxt);
4492
4493 /*
4494 * Pop-up of finished entities.
4495 */
4496 while ((RAW == 0) && (ctxt->inputNr > 1))
4497 xmlPopInput(ctxt);
4498
4499 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4500 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004501 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardcf461992000-03-14 18:30:20 +00004502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4503 ctxt->sax->error(ctxt->userData,
4504 "Content error in the external subset\n");
4505 ctxt->wellFormed = 0;
4506 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004507 break;
4508 }
4509 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004510 if (xmlParserDebugEntities) {
4511 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004512 xmlGenericError(xmlGenericErrorContext,
4513 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004514 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004515 xmlGenericError(xmlGenericErrorContext,
4516 "Leaving INCLUDE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004517 }
4518
Daniel Veillardcf461992000-03-14 18:30:20 +00004519 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4520 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4521 int state;
Daniel Veillard41e06512000-11-13 11:47:47 +00004522 int instate;
4523 int depth = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004524
4525 SKIP(6);
4526 SKIP_BLANKS;
4527 if (RAW != '[') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004528 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
Daniel Veillardcf461992000-03-14 18:30:20 +00004529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4530 ctxt->sax->error(ctxt->userData,
4531 "XML conditional section '[' expected\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004532 ctxt->wellFormed = 0;
4533 ctxt->disableSAX = 1;
4534 } else {
4535 NEXT;
4536 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004537 if (xmlParserDebugEntities) {
4538 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004539 xmlGenericError(xmlGenericErrorContext,
4540 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004541 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004542 xmlGenericError(xmlGenericErrorContext,
4543 "Entering IGNORE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004544 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004545
4546 /*
4547 * Parse up to the end of the conditionnal section
4548 * But disable SAX event generating DTD building in the meantime
4549 */
4550 state = ctxt->disableSAX;
Daniel Veillard41e06512000-11-13 11:47:47 +00004551 instate = ctxt->instate;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004552 ctxt->disableSAX = 1;
Daniel Veillard41e06512000-11-13 11:47:47 +00004553 ctxt->instate = XML_PARSER_IGNORE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004554
Daniel Veillard41e06512000-11-13 11:47:47 +00004555 while (depth >= 0) {
4556 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4557 depth++;
4558 SKIP(3);
4559 continue;
4560 }
4561 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4562 if (--depth >= 0) SKIP(3);
4563 continue;
4564 }
4565 NEXT;
4566 continue;
Daniel Veillardcf461992000-03-14 18:30:20 +00004567 }
Daniel Veillard41e06512000-11-13 11:47:47 +00004568
Daniel Veillardcf461992000-03-14 18:30:20 +00004569 ctxt->disableSAX = state;
Daniel Veillard41e06512000-11-13 11:47:47 +00004570 ctxt->instate = instate;
4571
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004572 if (xmlParserDebugEntities) {
4573 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004574 xmlGenericError(xmlGenericErrorContext,
4575 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004576 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004577 xmlGenericError(xmlGenericErrorContext,
4578 "Leaving IGNORE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004579 }
4580
Daniel Veillardcf461992000-03-14 18:30:20 +00004581 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004582 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
Daniel Veillardcf461992000-03-14 18:30:20 +00004583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "XML conditional section INCLUDE or IGNORE keyword expected\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004586 ctxt->wellFormed = 0;
4587 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004588 }
4589
Daniel Veillardcf461992000-03-14 18:30:20 +00004590 if (RAW == 0)
Daniel Veillard71b656e2000-01-05 14:46:17 +00004591 SHRINK;
4592
Daniel Veillardcf461992000-03-14 18:30:20 +00004593 if (RAW == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004594 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4596 ctxt->sax->error(ctxt->userData,
4597 "XML conditional section not closed\n");
4598 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004599 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004600 } else {
4601 SKIP(3);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004602 }
4603}
4604
4605/**
Daniel Veillard00fdf371999-10-08 09:40:39 +00004606 * xmlParseExternalSubset:
Daniel Veillard011b63c1999-06-02 17:44:04 +00004607 * @ctxt: an XML parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00004608 * @ExternalID: the external identifier
4609 * @SystemID: the system identifier (or URL)
Daniel Veillard011b63c1999-06-02 17:44:04 +00004610 *
4611 * parse Markup declarations from an external subset
4612 *
4613 * [30] extSubset ::= textDecl? extSubsetDecl
4614 *
4615 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004616 */
4617void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004618xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4619 const xmlChar *SystemID) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00004620 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004621 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard011b63c1999-06-02 17:44:04 +00004622 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4623 (NXT(4) == 'l')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004624 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004625 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4626 /*
4627 * The XML REC instructs us to stop parsing right here
4628 */
4629 ctxt->instate = XML_PARSER_EOF;
4630 return;
4631 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004632 }
4633 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004634 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004635 }
4636 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4637 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4638
Daniel Veillardb05deb71999-08-10 19:04:08 +00004639 ctxt->instate = XML_PARSER_DTD;
4640 ctxt->external = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004641 while (((RAW == '<') && (NXT(1) == '?')) ||
4642 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard011b63c1999-06-02 17:44:04 +00004643 IS_BLANK(CUR)) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004644 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004645 int cons = ctxt->input->consumed;
Daniel Veillardcf461992000-03-14 18:30:20 +00004646 int tok = ctxt->token;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004647
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004648 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004649 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004650 xmlParseConditionalSections(ctxt);
4651 } else if (IS_BLANK(CUR)) {
4652 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004653 } else if (RAW == '%') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004654 xmlParsePEReference(ctxt);
4655 } else
4656 xmlParseMarkupDecl(ctxt);
4657
4658 /*
4659 * Pop-up of finished entities.
4660 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004661 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004662 xmlPopInput(ctxt);
4663
Daniel Veillardcf461992000-03-14 18:30:20 +00004664 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4665 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004666 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4668 ctxt->sax->error(ctxt->userData,
4669 "Content error in the external subset\n");
4670 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004671 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004672 break;
4673 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004674 }
4675
Daniel Veillardcf461992000-03-14 18:30:20 +00004676 if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004677 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4679 ctxt->sax->error(ctxt->userData,
4680 "Extra content at the end of the document\n");
4681 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004682 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004683 }
4684
4685}
4686
4687/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004688 * xmlParseReference:
4689 * @ctxt: an XML parser context
4690 *
4691 * parse and handle entity references in content, depending on the SAX
4692 * interface, this may end-up in a call to character() if this is a
4693 * CharRef, a predefined entity, if there is no reference() callback.
4694 * or if the parser was asked to switch to that mode.
4695 *
4696 * [67] Reference ::= EntityRef | CharRef
4697 */
4698void
4699xmlParseReference(xmlParserCtxtPtr ctxt) {
4700 xmlEntityPtr ent;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004701 xmlChar *val;
Daniel Veillardcf461992000-03-14 18:30:20 +00004702 if (RAW != '&') return;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004703
4704 if (NXT(1) == '#') {
Daniel Veillardcf461992000-03-14 18:30:20 +00004705 int i = 0;
4706 xmlChar out[10];
4707 int hex = NXT(2);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004708 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00004709
Daniel Veillardbe803962000-06-28 23:40:59 +00004710 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004711 /*
4712 * So we are using non-UTF-8 buffers
4713 * Check that the char fit on 8bits, if not
4714 * generate a CharRef.
4715 */
4716 if (val <= 0xFF) {
4717 out[0] = val;
4718 out[1] = 0;
4719 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4720 (!ctxt->disableSAX))
4721 ctxt->sax->characters(ctxt->userData, out, 1);
4722 } else {
4723 if ((hex == 'x') || (hex == 'X'))
4724 sprintf((char *)out, "#x%X", val);
4725 else
4726 sprintf((char *)out, "#%d", val);
4727 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4728 (!ctxt->disableSAX))
4729 ctxt->sax->reference(ctxt->userData, out);
4730 }
4731 } else {
4732 /*
4733 * Just encode the value in UTF-8
4734 */
4735 COPY_BUF(0 ,out, i, val);
4736 out[i] = 0;
4737 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4738 (!ctxt->disableSAX))
4739 ctxt->sax->characters(ctxt->userData, out, i);
4740 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004741 } else {
4742 ent = xmlParseEntityRef(ctxt);
4743 if (ent == NULL) return;
4744 if ((ent->name != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00004745 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4746 xmlNodePtr list = NULL;
4747 int ret;
4748
4749
4750 /*
4751 * The first reference to the entity trigger a parsing phase
4752 * where the ent->children is filled with the result from
4753 * the parsing.
4754 */
4755 if (ent->children == NULL) {
4756 xmlChar *value;
4757 value = ent->content;
4758
4759 /*
4760 * Check that this entity is well formed
4761 */
4762 if ((value != NULL) &&
4763 (value[1] == 0) && (value[0] == '<') &&
Daniel Veillard8b5dd832000-10-01 20:28:44 +00004764 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004765 /*
Daniel Veillarde0854c32000-08-27 21:12:29 +00004766 * DONE: get definite answer on this !!!
Daniel Veillardcf461992000-03-14 18:30:20 +00004767 * Lots of entity decls are used to declare a single
4768 * char
4769 * <!ENTITY lt "<">
4770 * Which seems to be valid since
4771 * 2.4: The ampersand character (&) and the left angle
4772 * bracket (<) may appear in their literal form only
4773 * when used ... They are also legal within the literal
4774 * entity value of an internal entity declaration;i
4775 * see "4.3.2 Well-Formed Parsed Entities".
4776 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4777 * Looking at the OASIS test suite and James Clark
4778 * tests, this is broken. However the XML REC uses
4779 * it. Is the XML REC not well-formed ????
4780 * This is a hack to avoid this problem
Daniel Veillarde0854c32000-08-27 21:12:29 +00004781 *
4782 * ANSWER: since lt gt amp .. are already defined,
4783 * this is a redefinition and hence the fact that the
4784 * contentis not well balanced is not a Wf error, this
4785 * is lousy but acceptable.
Daniel Veillardcf461992000-03-14 18:30:20 +00004786 */
4787 list = xmlNewDocText(ctxt->myDoc, value);
4788 if (list != NULL) {
4789 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4790 (ent->children == NULL)) {
4791 ent->children = list;
4792 ent->last = list;
4793 list->parent = (xmlNodePtr) ent;
4794 } else {
4795 xmlFreeNodeList(list);
4796 }
4797 } else if (list != NULL) {
4798 xmlFreeNodeList(list);
4799 }
4800 } else {
4801 /*
4802 * 4.3.2: An internal general parsed entity is well-formed
4803 * if its replacement text matches the production labeled
4804 * content.
4805 */
4806 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4807 ctxt->depth++;
4808 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4809 ctxt->sax, NULL, ctxt->depth,
4810 value, &list);
4811 ctxt->depth--;
4812 } else if (ent->etype ==
4813 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4814 ctxt->depth++;
4815 ret = xmlParseExternalEntity(ctxt->myDoc,
4816 ctxt->sax, NULL, ctxt->depth,
Daniel Veillard39c7d712000-09-10 16:14:55 +00004817 ent->URI, ent->ExternalID, &list);
Daniel Veillardcf461992000-03-14 18:30:20 +00004818 ctxt->depth--;
4819 } else {
4820 ret = -1;
4821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4822 ctxt->sax->error(ctxt->userData,
4823 "Internal: invalid entity type\n");
4824 }
4825 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004826 ctxt->errNo = XML_ERR_ENTITY_LOOP;
Daniel Veillardcf461992000-03-14 18:30:20 +00004827 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4828 ctxt->sax->error(ctxt->userData,
4829 "Detected entity reference loop\n");
4830 ctxt->wellFormed = 0;
4831 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004832 } else if ((ret == 0) && (list != NULL)) {
4833 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4834 (ent->children == NULL)) {
4835 ent->children = list;
4836 while (list != NULL) {
4837 list->parent = (xmlNodePtr) ent;
4838 if (list->next == NULL)
4839 ent->last = list;
4840 list = list->next;
4841 }
4842 } else {
4843 xmlFreeNodeList(list);
4844 }
4845 } else if (ret > 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004846 ctxt->errNo = ret;
Daniel Veillardcf461992000-03-14 18:30:20 +00004847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4848 ctxt->sax->error(ctxt->userData,
4849 "Entity value required\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004850 ctxt->wellFormed = 0;
4851 ctxt->disableSAX = 1;
4852 } else if (list != NULL) {
4853 xmlFreeNodeList(list);
4854 }
4855 }
4856 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00004857 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00004858 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004859 /*
4860 * Create a node.
4861 */
4862 ctxt->sax->reference(ctxt->userData, ent->name);
4863 return;
4864 } else if (ctxt->replaceEntities) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004865 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4866 /*
4867 * Seems we are generating the DOM content, do
4868 * a simple tree copy
4869 */
4870 xmlNodePtr new;
4871 new = xmlCopyNodeList(ent->children);
4872
4873 xmlAddChildList(ctxt->node, new);
4874 /*
4875 * This is to avoid a nasty side effect, see
4876 * characters() in SAX.c
4877 */
4878 ctxt->nodemem = 0;
4879 ctxt->nodelen = 0;
4880 return;
4881 } else {
4882 /*
4883 * Probably running in SAX mode
4884 */
4885 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004886
Daniel Veillarde0854c32000-08-27 21:12:29 +00004887 input = xmlNewEntityInputStream(ctxt, ent);
4888 xmlPushInput(ctxt, input);
4889 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4890 (RAW == '<') && (NXT(1) == '?') &&
4891 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4892 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4893 xmlParseTextDecl(ctxt);
4894 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4895 /*
4896 * The XML REC instructs us to stop parsing right here
4897 */
4898 ctxt->instate = XML_PARSER_EOF;
4899 return;
4900 }
4901 if (input->standalone == 1) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004902 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
Daniel Veillarde0854c32000-08-27 21:12:29 +00004903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4904 ctxt->sax->error(ctxt->userData,
4905 "external parsed entities cannot be standalone\n");
Daniel Veillarde0854c32000-08-27 21:12:29 +00004906 ctxt->wellFormed = 0;
4907 ctxt->disableSAX = 1;
4908 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004909 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004910 return;
Daniel Veillardcf461992000-03-14 18:30:20 +00004911 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00004912 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004913 } else {
4914 val = ent->content;
4915 if (val == NULL) return;
4916 /*
4917 * inline the entity.
4918 */
4919 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4920 (!ctxt->disableSAX))
4921 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
Daniel Veillard011b63c1999-06-02 17:44:04 +00004922 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004923 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004924}
4925
Daniel Veillard11e00581998-10-24 18:27:49 +00004926/**
4927 * xmlParseEntityRef:
4928 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004929 *
4930 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004931 *
4932 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004933 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004934 * [ WFC: Entity Declared ]
4935 * In a document without any DTD, a document with only an internal DTD
4936 * subset which contains no parameter entity references, or a document
4937 * with "standalone='yes'", the Name given in the entity reference
4938 * must match that in an entity declaration, except that well-formed
4939 * documents need not declare any of the following entities: amp, lt,
4940 * gt, apos, quot. The declaration of a parameter entity must precede
4941 * any reference to it. Similarly, the declaration of a general entity
4942 * must precede any reference to it which appears in a default value in an
4943 * attribute-list declaration. Note that if entities are declared in the
4944 * external subset or in external parameter entities, a non-validating
4945 * processor is not obligated to read and process their declarations;
4946 * for such documents, the rule that an entity must be declared is a
4947 * well-formedness constraint only if standalone='yes'.
4948 *
4949 * [ WFC: Parsed Entity ]
4950 * An entity reference must not contain the name of an unparsed entity
4951 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004952 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004953 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004954xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004955xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004956 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00004957 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004958
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004959 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004960
Daniel Veillardcf461992000-03-14 18:30:20 +00004961 if (RAW == '&') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004962 NEXT;
4963 name = xmlParseName(ctxt);
4964 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004965 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004967 ctxt->sax->error(ctxt->userData,
4968 "xmlParseEntityRef: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004969 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004970 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004971 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004972 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004973 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004974 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00004975 * Ask first SAX for entity resolution, otherwise try the
4976 * predefined set.
4977 */
4978 if (ctxt->sax != NULL) {
4979 if (ctxt->sax->getEntity != NULL)
4980 ent = ctxt->sax->getEntity(ctxt->userData, name);
4981 if (ent == NULL)
4982 ent = xmlGetPredefinedEntity(name);
4983 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004984 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004985 * [ WFC: Entity Declared ]
4986 * In a document without any DTD, a document with only an
4987 * internal DTD subset which contains no parameter entity
4988 * references, or a document with "standalone='yes'", the
4989 * Name given in the entity reference must match that in an
4990 * entity declaration, except that well-formed documents
4991 * need not declare any of the following entities: amp, lt,
4992 * gt, apos, quot.
4993 * The declaration of a parameter entity must precede any
4994 * reference to it.
4995 * Similarly, the declaration of a general entity must
4996 * precede any reference to it which appears in a default
4997 * value in an attribute-list declaration. Note that if
4998 * entities are declared in the external subset or in
4999 * external parameter entities, a non-validating processor
5000 * is not obligated to read and process their declarations;
5001 * for such documents, the rule that an entity must be
5002 * declared is a well-formedness constraint only if
5003 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005004 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005005 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005006 if ((ctxt->standalone == 1) ||
5007 ((ctxt->hasExternalSubset == 0) &&
5008 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005009 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005011 ctxt->sax->error(ctxt->userData,
5012 "Entity '%s' not defined\n", name);
5013 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005014 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005015 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005016 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005017 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5018 ctxt->sax->warning(ctxt->userData,
5019 "Entity '%s' not defined\n", name);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005020 }
5021 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005022
5023 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005024 * [ WFC: Parsed Entity ]
5025 * An entity reference must not contain the name of an
5026 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005027 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005028 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005029 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5031 ctxt->sax->error(ctxt->userData,
5032 "Entity reference to unparsed entity %s\n", name);
5033 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005034 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005035 }
5036
5037 /*
5038 * [ WFC: No External Entity References ]
5039 * Attribute values cannot contain direct or indirect
5040 * entity references to external entities.
5041 */
5042 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00005043 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005044 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046 ctxt->sax->error(ctxt->userData,
5047 "Attribute references external entity '%s'\n", name);
5048 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005049 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005050 }
5051 /*
5052 * [ WFC: No < in Attribute Values ]
5053 * The replacement text of any entity referred to directly or
5054 * indirectly in an attribute value (other than "&lt;") must
5055 * not contain a <.
5056 */
5057 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00005058 (ent != NULL) &&
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005059 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005060 (ent->content != NULL) &&
5061 (xmlStrchr(ent->content, '<'))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005062 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5064 ctxt->sax->error(ctxt->userData,
5065 "'<' in entity '%s' is not allowed in attributes values\n", name);
5066 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005067 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005068 }
5069
5070 /*
5071 * Internal check, no parameter entities here ...
5072 */
5073 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005074 switch (ent->etype) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005075 case XML_INTERNAL_PARAMETER_ENTITY:
5076 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005077 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005079 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005080 "Attempt to reference the parameter entity '%s'\n", name);
5081 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005082 ctxt->disableSAX = 1;
5083 break;
5084 default:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005085 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005086 }
5087 }
5088
5089 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005090 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005091 * A parsed entity must not contain a recursive reference
Daniel Veillardb96e6431999-08-29 21:02:19 +00005092 * to itself, either directly or indirectly.
Daniel Veillardb1059e22000-09-16 14:02:43 +00005093 * Done somewhere else
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005094 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00005095
Daniel Veillard011b63c1999-06-02 17:44:04 +00005096 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005097 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005099 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005100 "xmlParseEntityRef: expecting ';'\n");
5101 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005102 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005103 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005104 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005105 }
5106 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005107 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005108}
Daniel Veillardb1059e22000-09-16 14:02:43 +00005109
Daniel Veillard10a2c651999-12-12 13:03:50 +00005110/**
5111 * xmlParseStringEntityRef:
5112 * @ctxt: an XML parser context
5113 * @str: a pointer to an index in the string
5114 *
5115 * parse ENTITY references declarations, but this version parses it from
5116 * a string value.
5117 *
5118 * [68] EntityRef ::= '&' Name ';'
5119 *
5120 * [ WFC: Entity Declared ]
5121 * In a document without any DTD, a document with only an internal DTD
5122 * subset which contains no parameter entity references, or a document
5123 * with "standalone='yes'", the Name given in the entity reference
5124 * must match that in an entity declaration, except that well-formed
5125 * documents need not declare any of the following entities: amp, lt,
5126 * gt, apos, quot. The declaration of a parameter entity must precede
5127 * any reference to it. Similarly, the declaration of a general entity
5128 * must precede any reference to it which appears in a default value in an
5129 * attribute-list declaration. Note that if entities are declared in the
5130 * external subset or in external parameter entities, a non-validating
5131 * processor is not obligated to read and process their declarations;
5132 * for such documents, the rule that an entity must be declared is a
5133 * well-formedness constraint only if standalone='yes'.
5134 *
5135 * [ WFC: Parsed Entity ]
5136 * An entity reference must not contain the name of an unparsed entity
5137 *
5138 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5139 * is updated to the current location in the string.
5140 */
5141xmlEntityPtr
5142xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5143 xmlChar *name;
5144 const xmlChar *ptr;
5145 xmlChar cur;
5146 xmlEntityPtr ent = NULL;
5147
Daniel Veillardcf461992000-03-14 18:30:20 +00005148 if ((str == NULL) || (*str == NULL))
5149 return(NULL);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005150 ptr = *str;
5151 cur = *ptr;
5152 if (cur == '&') {
5153 ptr++;
5154 cur = *ptr;
5155 name = xmlParseStringName(ctxt, &ptr);
5156 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005157 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5159 ctxt->sax->error(ctxt->userData,
5160 "xmlParseEntityRef: no name\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005161 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005162 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005163 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005164 if (*ptr == ';') {
5165 ptr++;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005166 /*
5167 * Ask first SAX for entity resolution, otherwise try the
5168 * predefined set.
5169 */
5170 if (ctxt->sax != NULL) {
5171 if (ctxt->sax->getEntity != NULL)
5172 ent = ctxt->sax->getEntity(ctxt->userData, name);
5173 if (ent == NULL)
5174 ent = xmlGetPredefinedEntity(name);
5175 }
5176 /*
5177 * [ WFC: Entity Declared ]
5178 * In a document without any DTD, a document with only an
5179 * internal DTD subset which contains no parameter entity
5180 * references, or a document with "standalone='yes'", the
5181 * Name given in the entity reference must match that in an
5182 * entity declaration, except that well-formed documents
5183 * need not declare any of the following entities: amp, lt,
5184 * gt, apos, quot.
5185 * The declaration of a parameter entity must precede any
5186 * reference to it.
5187 * Similarly, the declaration of a general entity must
5188 * precede any reference to it which appears in a default
5189 * value in an attribute-list declaration. Note that if
5190 * entities are declared in the external subset or in
5191 * external parameter entities, a non-validating processor
5192 * is not obligated to read and process their declarations;
5193 * for such documents, the rule that an entity must be
5194 * declared is a well-formedness constraint only if
5195 * standalone='yes'.
5196 */
5197 if (ent == NULL) {
5198 if ((ctxt->standalone == 1) ||
5199 ((ctxt->hasExternalSubset == 0) &&
5200 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005201 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5203 ctxt->sax->error(ctxt->userData,
5204 "Entity '%s' not defined\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005205 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005206 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005207 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005208 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005209 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5210 ctxt->sax->warning(ctxt->userData,
5211 "Entity '%s' not defined\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005212 }
5213 }
5214
5215 /*
5216 * [ WFC: Parsed Entity ]
5217 * An entity reference must not contain the name of an
5218 * unparsed entity
5219 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005220 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005221 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5223 ctxt->sax->error(ctxt->userData,
5224 "Entity reference to unparsed entity %s\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005225 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005226 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005227 }
5228
5229 /*
5230 * [ WFC: No External Entity References ]
5231 * Attribute values cannot contain direct or indirect
5232 * entity references to external entities.
5233 */
5234 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00005235 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005236 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5238 ctxt->sax->error(ctxt->userData,
5239 "Attribute references external entity '%s'\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005240 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005241 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005242 }
5243 /*
5244 * [ WFC: No < in Attribute Values ]
5245 * The replacement text of any entity referred to directly or
5246 * indirectly in an attribute value (other than "&lt;") must
5247 * not contain a <.
5248 */
5249 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5250 (ent != NULL) &&
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005251 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
Daniel Veillard10a2c651999-12-12 13:03:50 +00005252 (ent->content != NULL) &&
5253 (xmlStrchr(ent->content, '<'))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005254 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256 ctxt->sax->error(ctxt->userData,
5257 "'<' in entity '%s' is not allowed in attributes values\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005258 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005259 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005260 }
5261
5262 /*
5263 * Internal check, no parameter entities here ...
5264 */
5265 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005266 switch (ent->etype) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00005267 case XML_INTERNAL_PARAMETER_ENTITY:
5268 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005269 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5271 ctxt->sax->error(ctxt->userData,
5272 "Attempt to reference the parameter entity '%s'\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005273 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005274 ctxt->disableSAX = 1;
5275 break;
5276 default:
Daniel Veillard10a2c651999-12-12 13:03:50 +00005277 break;
5278 }
5279 }
5280
5281 /*
5282 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005283 * A parsed entity must not contain a recursive reference
Daniel Veillard10a2c651999-12-12 13:03:50 +00005284 * to itself, either directly or indirectly.
Daniel Veillardb1059e22000-09-16 14:02:43 +00005285 * Done somewhwere else
Daniel Veillard10a2c651999-12-12 13:03:50 +00005286 */
5287
5288 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005289 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005290 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5291 ctxt->sax->error(ctxt->userData,
5292 "xmlParseEntityRef: expecting ';'\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005293 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005294 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005295 }
5296 xmlFree(name);
5297 }
5298 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005299 *str = ptr;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005300 return(ent);
5301}
Daniel Veillard260a68f1998-08-13 03:39:55 +00005302
Daniel Veillard11e00581998-10-24 18:27:49 +00005303/**
5304 * xmlParsePEReference:
5305 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005306 *
5307 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00005308 * The entity content is handled directly by pushing it's content as
5309 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005310 *
5311 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00005312 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005313 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005314 * A parsed entity must not contain a recursive
Daniel Veillardb05deb71999-08-10 19:04:08 +00005315 * reference to itself, either directly or indirectly.
5316 *
5317 * [ WFC: Entity Declared ]
5318 * In a document without any DTD, a document with only an internal DTD
5319 * subset which contains no parameter entity references, or a document
5320 * with "standalone='yes'", ... ... The declaration of a parameter
5321 * entity must precede any reference to it...
5322 *
5323 * [ VC: Entity Declared ]
5324 * In a document with an external subset or external parameter entities
5325 * with "standalone='no'", ... ... The declaration of a parameter entity
5326 * must precede any reference to it...
5327 *
5328 * [ WFC: In DTD ]
5329 * Parameter-entity references may only appear in the DTD.
5330 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005331 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005332void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005333xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005334 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00005335 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00005336 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005337
Daniel Veillardcf461992000-03-14 18:30:20 +00005338 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005339 NEXT;
5340 name = xmlParseName(ctxt);
5341 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005342 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005344 ctxt->sax->error(ctxt->userData,
5345 "xmlParsePEReference: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005346 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005347 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005348 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005349 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005350 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005351 if ((ctxt->sax != NULL) &&
5352 (ctxt->sax->getParameterEntity != NULL))
5353 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5354 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005355 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005356 /*
5357 * [ WFC: Entity Declared ]
5358 * In a document without any DTD, a document with only an
5359 * internal DTD subset which contains no parameter entity
5360 * references, or a document with "standalone='yes'", ...
5361 * ... The declaration of a parameter entity must precede
5362 * any reference to it...
5363 */
5364 if ((ctxt->standalone == 1) ||
5365 ((ctxt->hasExternalSubset == 0) &&
5366 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005367 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00005368 if ((!ctxt->disableSAX) &&
5369 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005370 ctxt->sax->error(ctxt->userData,
5371 "PEReference: %%%s; not found\n", name);
5372 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005373 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005374 } else {
5375 /*
5376 * [ VC: Entity Declared ]
5377 * In a document with an external subset or external
5378 * parameter entities with "standalone='no'", ...
5379 * ... The declaration of a parameter entity must precede
5380 * any reference to it...
5381 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00005382 if ((!ctxt->disableSAX) &&
5383 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005384 ctxt->sax->warning(ctxt->userData,
5385 "PEReference: %%%s; not found\n", name);
5386 ctxt->valid = 0;
5387 }
Daniel Veillardccb09631998-10-27 06:21:04 +00005388 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005389 /*
5390 * Internal checking in case the entity quest barfed
5391 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005392 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5393 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005394 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5395 ctxt->sax->warning(ctxt->userData,
5396 "Internal: %%%s; is not a parameter entity\n", name);
5397 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005398 /*
5399 * TODO !!!
5400 * handle the extra spaces added before and after
5401 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5402 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00005403 input = xmlNewEntityInputStream(ctxt, entity);
5404 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00005405 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5406 (RAW == '<') && (NXT(1) == '?') &&
5407 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5408 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5409 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00005410 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5411 /*
5412 * The XML REC instructs us to stop parsing
5413 * right here
5414 */
5415 ctxt->instate = XML_PARSER_EOF;
5416 xmlFree(name);
5417 return;
5418 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005419 }
5420 if (ctxt->token == 0)
5421 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00005422 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005423 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005424 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005425 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005426 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005428 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005429 "xmlParsePEReference: expecting ';'\n");
5430 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005431 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005432 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005433 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005434 }
5435 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005436}
5437
Daniel Veillard11e00581998-10-24 18:27:49 +00005438/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00005439 * xmlParseStringPEReference:
5440 * @ctxt: an XML parser context
5441 * @str: a pointer to an index in the string
5442 *
5443 * parse PEReference declarations
5444 *
5445 * [69] PEReference ::= '%' Name ';'
5446 *
5447 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005448 * A parsed entity must not contain a recursive
Daniel Veillard10a2c651999-12-12 13:03:50 +00005449 * reference to itself, either directly or indirectly.
5450 *
5451 * [ WFC: Entity Declared ]
5452 * In a document without any DTD, a document with only an internal DTD
5453 * subset which contains no parameter entity references, or a document
5454 * with "standalone='yes'", ... ... The declaration of a parameter
5455 * entity must precede any reference to it...
5456 *
5457 * [ VC: Entity Declared ]
5458 * In a document with an external subset or external parameter entities
5459 * with "standalone='no'", ... ... The declaration of a parameter entity
5460 * must precede any reference to it...
5461 *
5462 * [ WFC: In DTD ]
5463 * Parameter-entity references may only appear in the DTD.
5464 * NOTE: misleading but this is handled.
5465 *
5466 * Returns the string of the entity content.
5467 * str is updated to the current value of the index
5468 */
5469xmlEntityPtr
5470xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5471 const xmlChar *ptr;
5472 xmlChar cur;
5473 xmlChar *name;
5474 xmlEntityPtr entity = NULL;
5475
5476 if ((str == NULL) || (*str == NULL)) return(NULL);
5477 ptr = *str;
5478 cur = *ptr;
5479 if (cur == '%') {
5480 ptr++;
5481 cur = *ptr;
5482 name = xmlParseStringName(ctxt, &ptr);
5483 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005484 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5486 ctxt->sax->error(ctxt->userData,
5487 "xmlParseStringPEReference: no name\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005488 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005489 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005490 } else {
5491 cur = *ptr;
5492 if (cur == ';') {
5493 ptr++;
5494 cur = *ptr;
5495 if ((ctxt->sax != NULL) &&
5496 (ctxt->sax->getParameterEntity != NULL))
5497 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5498 name);
5499 if (entity == NULL) {
5500 /*
5501 * [ WFC: Entity Declared ]
5502 * In a document without any DTD, a document with only an
5503 * internal DTD subset which contains no parameter entity
5504 * references, or a document with "standalone='yes'", ...
5505 * ... The declaration of a parameter entity must precede
5506 * any reference to it...
5507 */
5508 if ((ctxt->standalone == 1) ||
5509 ((ctxt->hasExternalSubset == 0) &&
5510 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005511 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5513 ctxt->sax->error(ctxt->userData,
5514 "PEReference: %%%s; not found\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005515 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005516 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005517 } else {
5518 /*
5519 * [ VC: Entity Declared ]
5520 * In a document with an external subset or external
5521 * parameter entities with "standalone='no'", ...
5522 * ... The declaration of a parameter entity must
5523 * precede any reference to it...
5524 */
5525 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5526 ctxt->sax->warning(ctxt->userData,
5527 "PEReference: %%%s; not found\n", name);
5528 ctxt->valid = 0;
5529 }
5530 } else {
5531 /*
5532 * Internal checking in case the entity quest barfed
5533 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005534 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5535 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00005536 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5537 ctxt->sax->warning(ctxt->userData,
5538 "Internal: %%%s; is not a parameter entity\n", name);
5539 }
5540 }
5541 ctxt->hasPErefs = 1;
5542 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005543 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5545 ctxt->sax->error(ctxt->userData,
5546 "xmlParseStringPEReference: expecting ';'\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005547 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005548 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005549 }
5550 xmlFree(name);
5551 }
5552 }
5553 *str = ptr;
5554 return(entity);
5555}
5556
5557/**
Daniel Veillardcf461992000-03-14 18:30:20 +00005558 * xmlParseDocTypeDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00005559 * @ctxt: an XML parser context
5560 *
5561 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005562 *
5563 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5564 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00005565 *
5566 * [ VC: Root Element Type ]
5567 * The Name in the document type declaration must match the element
5568 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005569 */
5570
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005571void
5572xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005573 xmlChar *name = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005574 xmlChar *ExternalID = NULL;
5575 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005576
5577 /*
5578 * We know that '<!DOCTYPE' has been detected.
5579 */
5580 SKIP(9);
5581
5582 SKIP_BLANKS;
5583
5584 /*
5585 * Parse the DOCTYPE name.
5586 */
5587 name = xmlParseName(ctxt);
5588 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005589 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005591 ctxt->sax->error(ctxt->userData,
5592 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005593 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005594 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005595 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005596 ctxt->intSubName = name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005597
5598 SKIP_BLANKS;
5599
5600 /*
5601 * Check for SystemID and ExternalID
5602 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00005603 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005604
5605 if ((URI != NULL) || (ExternalID != NULL)) {
5606 ctxt->hasExternalSubset = 1;
5607 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005608 ctxt->extSubURI = URI;
5609 ctxt->extSubSystem = ExternalID;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005610
Daniel Veillard260a68f1998-08-13 03:39:55 +00005611 SKIP_BLANKS;
5612
Daniel Veillard011b63c1999-06-02 17:44:04 +00005613 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00005614 * Create and update the internal subset.
Daniel Veillard011b63c1999-06-02 17:44:04 +00005615 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005616 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5617 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00005618 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005619
5620 /*
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005621 * Is there any internal subset declarations ?
5622 * they are handled separately in xmlParseInternalSubset()
5623 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005624 if (RAW == '[')
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005625 return;
5626
5627 /*
5628 * We should be at the end of the DOCTYPE declaration.
5629 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005630 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005631 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5633 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5634 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005635 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005636 }
5637 NEXT;
5638}
5639
5640/**
Daniel Veillardcf461992000-03-14 18:30:20 +00005641 * xmlParseInternalsubset:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005642 * @ctxt: an XML parser context
5643 *
5644 * parse the internal subset declaration
5645 *
5646 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5647 */
5648
5649void
5650xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5651 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005652 * Is there any DTD definition ?
5653 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005654 if (RAW == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005655 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005656 NEXT;
5657 /*
5658 * Parse the succession of Markup declarations and
5659 * PEReferences.
5660 * Subsequence (markupdecl | PEReference | S)*
5661 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005662 while (RAW != ']') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005663 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005664 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005665
5666 SKIP_BLANKS;
5667 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00005668 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005669
Daniel Veillard011b63c1999-06-02 17:44:04 +00005670 /*
5671 * Pop-up of finished entities.
5672 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005673 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005674 xmlPopInput(ctxt);
5675
Daniel Veillardc26087b1999-08-30 11:23:51 +00005676 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005677 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5679 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005680 "xmlParseInternalSubset: error detected in Markup declaration\n");
Daniel Veillardb96e6431999-08-29 21:02:19 +00005681 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005682 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005683 break;
5684 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005685 }
Daniel Veillard36650692000-07-21 15:16:39 +00005686 if (RAW == ']') {
5687 NEXT;
5688 SKIP_BLANKS;
5689 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005690 }
5691
5692 /*
5693 * We should be at the end of the DOCTYPE declaration.
5694 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005695 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005696 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005697 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005698 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005699 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005700 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005701 }
5702 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005703}
5704
Daniel Veillard11e00581998-10-24 18:27:49 +00005705/**
5706 * xmlParseAttribute:
5707 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005708 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00005709 *
5710 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00005711 *
5712 * [41] Attribute ::= Name Eq AttValue
5713 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005714 * [ WFC: No External Entity References ]
5715 * Attribute values cannot contain direct or indirect entity references
5716 * to external entities.
5717 *
5718 * [ WFC: No < in Attribute Values ]
5719 * The replacement text of any entity referred to directly or indirectly in
5720 * an attribute value (other than "&lt;") must not contain a <.
5721 *
5722 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005723 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00005724 * declared for it.
5725 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005726 * [25] Eq ::= S? '=' S?
5727 *
5728 * With namespace:
5729 *
5730 * [NS 11] Attribute ::= QName Eq AttValue
5731 *
5732 * Also the case QName == xmlns:??? is handled independently as a namespace
5733 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00005734 *
Daniel Veillard517752b1999-04-05 12:20:10 +00005735 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005736 */
5737
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005738xmlChar *
5739xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5740 xmlChar *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005741
Daniel Veillard517752b1999-04-05 12:20:10 +00005742 *value = NULL;
5743 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005744 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005745 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005747 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005748 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005749 ctxt->disableSAX = 1;
Daniel Veillardccb09631998-10-27 06:21:04 +00005750 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005751 }
5752
5753 /*
5754 * read the value
5755 */
5756 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005757 if (RAW == '=') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005758 NEXT;
5759 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00005760 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005761 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005762 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005763 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005765 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005766 "Specification mandate value for attribute %s\n", name);
5767 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005768 ctxt->disableSAX = 1;
5769 xmlFree(name);
Daniel Veillardccb09631998-10-27 06:21:04 +00005770 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005771 }
5772
Daniel Veillardcf461992000-03-14 18:30:20 +00005773 /*
5774 * Check that xml:lang conforms to the specification
Daniel Veillarde0854c32000-08-27 21:12:29 +00005775 * No more registered as an error, just generate a warning now
5776 * since this was deprecated in XML second edition
Daniel Veillardcf461992000-03-14 18:30:20 +00005777 */
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005778 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005779 if (!xmlCheckLanguageID(val)) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00005780 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5781 ctxt->sax->warning(ctxt->userData,
5782 "Malformed value for xml:lang : %s\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +00005783 }
5784 }
5785
5786 /*
5787 * Check that xml:space conforms to the specification
5788 */
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005789 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5790 if (xmlStrEqual(val, BAD_CAST "default"))
Daniel Veillardcf461992000-03-14 18:30:20 +00005791 *(ctxt->space) = 0;
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005792 else if (xmlStrEqual(val, BAD_CAST "preserve"))
Daniel Veillardcf461992000-03-14 18:30:20 +00005793 *(ctxt->space) = 1;
5794 else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005795 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillardcf461992000-03-14 18:30:20 +00005796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
5798"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5799 val);
Daniel Veillardcf461992000-03-14 18:30:20 +00005800 ctxt->wellFormed = 0;
5801 ctxt->disableSAX = 1;
5802 }
5803 }
5804
Daniel Veillard517752b1999-04-05 12:20:10 +00005805 *value = val;
5806 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005807}
5808
Daniel Veillard11e00581998-10-24 18:27:49 +00005809/**
5810 * xmlParseStartTag:
5811 * @ctxt: an XML parser context
5812 *
5813 * parse a start of tag either for rule element or
5814 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005815 *
5816 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5817 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005818 * [ WFC: Unique Att Spec ]
5819 * No attribute name may appear more than once in the same start-tag or
5820 * empty-element tag.
5821 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005822 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5823 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005824 * [ WFC: Unique Att Spec ]
5825 * No attribute name may appear more than once in the same start-tag or
5826 * empty-element tag.
5827 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005828 * With namespace:
5829 *
5830 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5831 *
5832 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00005833 *
Daniel Veillard06047432000-04-24 11:33:38 +00005834 * Returns the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00005835 */
5836
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005837xmlChar *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005838xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005839 xmlChar *name;
5840 xmlChar *attname;
5841 xmlChar *attvalue;
5842 const xmlChar **atts = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00005843 int nbatts = 0;
5844 int maxatts = 0;
5845 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005846
Daniel Veillardcf461992000-03-14 18:30:20 +00005847 if (RAW != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005848 NEXT;
5849
Daniel Veillard517752b1999-04-05 12:20:10 +00005850 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005851 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005852 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005854 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005855 "xmlParseStartTag: invalid element name\n");
5856 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005857 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00005858 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005859 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005860
5861 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005862 * Now parse the attributes, it ends up with the ending
5863 *
5864 * (S Attribute)* S?
5865 */
5866 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005867 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005868
5869 while ((IS_CHAR(RAW)) &&
5870 (RAW != '>') &&
5871 ((RAW != '/') || (NXT(1) != '>'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005872 const xmlChar *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005873 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005874
Daniel Veillard517752b1999-04-05 12:20:10 +00005875 attname = xmlParseAttribute(ctxt, &attvalue);
5876 if ((attname != NULL) && (attvalue != NULL)) {
5877 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005878 * [ WFC: Unique Att Spec ]
5879 * No attribute name may appear more than once in the same
5880 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00005881 */
5882 for (i = 0; i < nbatts;i += 2) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005883 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005884 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard517752b1999-04-05 12:20:10 +00005885 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005886 ctxt->sax->error(ctxt->userData,
5887 "Attribute %s redefined\n",
5888 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00005889 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005890 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00005891 xmlFree(attname);
5892 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005893 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00005894 }
5895 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005896
Daniel Veillard517752b1999-04-05 12:20:10 +00005897 /*
5898 * Add the pair to atts
5899 */
5900 if (atts == NULL) {
5901 maxatts = 10;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005902 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00005903 if (atts == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00005904 xmlGenericError(xmlGenericErrorContext,
5905 "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005906 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00005907 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00005908 }
Daniel Veillard51e3b151999-11-12 17:02:31 +00005909 } else if (nbatts + 4 > maxatts) {
Daniel Veillard517752b1999-04-05 12:20:10 +00005910 maxatts *= 2;
Daniel Veillard4b0755c2000-09-25 14:26:28 +00005911 atts = (const xmlChar **) xmlRealloc((void *) atts,
5912 maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00005913 if (atts == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00005914 xmlGenericError(xmlGenericErrorContext,
5915 "realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005916 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00005917 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00005918 }
5919 }
5920 atts[nbatts++] = attname;
5921 atts[nbatts++] = attvalue;
5922 atts[nbatts] = NULL;
5923 atts[nbatts + 1] = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00005924 } else {
5925 if (attname != NULL)
5926 xmlFree(attname);
5927 if (attvalue != NULL)
5928 xmlFree(attvalue);
Daniel Veillard517752b1999-04-05 12:20:10 +00005929 }
5930
Daniel Veillardb96e6431999-08-29 21:02:19 +00005931failed:
Daniel Veillardcf461992000-03-14 18:30:20 +00005932
5933 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5934 break;
5935 if (!IS_BLANK(RAW)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005936 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardcf461992000-03-14 18:30:20 +00005937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5938 ctxt->sax->error(ctxt->userData,
5939 "attributes construct error\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00005940 ctxt->wellFormed = 0;
5941 ctxt->disableSAX = 1;
5942 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005943 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005944 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005945 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005947 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005948 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005949 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005950 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005951 break;
5952 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005953 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005954 }
5955
5956 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005957 * SAX: Start of Element !
5958 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005959 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5960 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00005961 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00005962
Daniel Veillard517752b1999-04-05 12:20:10 +00005963 if (atts != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005964 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
Daniel Veillard4b0755c2000-09-25 14:26:28 +00005965 xmlFree((void *) atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00005966 }
Daniel Veillard14fff061999-06-22 21:49:07 +00005967 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005968}
5969
Daniel Veillard11e00581998-10-24 18:27:49 +00005970/**
5971 * xmlParseEndTag:
5972 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005973 *
5974 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00005975 *
5976 * [42] ETag ::= '</' Name S? '>'
5977 *
5978 * With namespace
5979 *
Daniel Veillard517752b1999-04-05 12:20:10 +00005980 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00005981 */
5982
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005983void
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005984xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005985 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005986 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005987
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005988 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005989 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005990 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005992 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005993 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005994 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005995 return;
5996 }
5997 SKIP(2);
5998
Daniel Veillard517752b1999-04-05 12:20:10 +00005999 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006000
6001 /*
6002 * We should definitely be at the ending "S? '>'" part
6003 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006004 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006005 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006006 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006007 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006009 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006010 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006011 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006012 } else
6013 NEXT;
6014
Daniel Veillard517752b1999-04-05 12:20:10 +00006015 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006016 * [ WFC: Element Type Match ]
6017 * The Name in an element's end-tag must match the element type in the
6018 * start-tag.
6019 *
Daniel Veillard14fff061999-06-22 21:49:07 +00006020 */
Daniel Veillardda07c342000-01-25 18:31:22 +00006021 if ((name == NULL) || (ctxt->name == NULL) ||
Daniel Veillard8b5dd832000-10-01 20:28:44 +00006022 (!xmlStrEqual(name, ctxt->name))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006023 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
Daniel Veillardda07c342000-01-25 18:31:22 +00006024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6025 if ((name != NULL) && (ctxt->name != NULL)) {
6026 ctxt->sax->error(ctxt->userData,
6027 "Opening and ending tag mismatch: %s and %s\n",
6028 ctxt->name, name);
6029 } else if (ctxt->name != NULL) {
6030 ctxt->sax->error(ctxt->userData,
6031 "Ending tag eror for: %s\n", ctxt->name);
6032 } else {
6033 ctxt->sax->error(ctxt->userData,
6034 "Ending tag error: internal error ???\n");
6035 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006036
Daniel Veillardda07c342000-01-25 18:31:22 +00006037 }
Daniel Veillard14fff061999-06-22 21:49:07 +00006038 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006039 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00006040 }
6041
6042 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00006043 * SAX: End of Tag
6044 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006045 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6046 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00006047 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00006048
6049 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006050 xmlFree(name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006051 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006052 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006053 if (oldname != NULL) {
6054#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006055 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006056#endif
6057 xmlFree(oldname);
6058 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006059 return;
6060}
6061
Daniel Veillard11e00581998-10-24 18:27:49 +00006062/**
6063 * xmlParseCDSect:
6064 * @ctxt: an XML parser context
6065 *
6066 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006067 *
6068 * [18] CDSect ::= CDStart CData CDEnd
6069 *
6070 * [19] CDStart ::= '<![CDATA['
6071 *
6072 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6073 *
6074 * [21] CDEnd ::= ']]>'
6075 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006076void
6077xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006078 xmlChar *buf = NULL;
6079 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006080 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00006081 int r, rl;
6082 int s, sl;
6083 int cur, l;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00006084 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006085
Daniel Veillardb05deb71999-08-10 19:04:08 +00006086 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006087 (NXT(2) == '[') && (NXT(3) == 'C') &&
6088 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6089 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6090 (NXT(8) == '[')) {
6091 SKIP(9);
6092 } else
6093 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006094
6095 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillardcf461992000-03-14 18:30:20 +00006096 r = CUR_CHAR(rl);
6097 if (!IS_CHAR(r)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006098 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006100 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006101 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006102 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006103 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006104 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006105 return;
6106 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006107 NEXTL(rl);
6108 s = CUR_CHAR(sl);
6109 if (!IS_CHAR(s)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006110 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006112 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006113 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006114 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006115 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006116 ctxt->instate = XML_PARSER_CONTENT;
6117 return;
6118 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006119 NEXTL(sl);
6120 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006121 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6122 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006123 xmlGenericError(xmlGenericErrorContext,
6124 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006125 return;
6126 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006127 while (IS_CHAR(cur) &&
6128 ((r != ']') || (s != ']') || (cur != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006129 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006130 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006131 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00006132 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006133 xmlGenericError(xmlGenericErrorContext,
6134 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006135 return;
6136 }
6137 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006138 COPY_BUF(rl,buf,len,r);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006139 r = s;
Daniel Veillardcf461992000-03-14 18:30:20 +00006140 rl = sl;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006141 s = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00006142 sl = l;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00006143 count++;
6144 if (count > 50) {
6145 GROW;
6146 count = 0;
6147 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006148 NEXTL(l);
6149 cur = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006150 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006151 buf[len] = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006152 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006153 if (cur != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006154 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006156 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006157 "CData section not finished\n%.50s\n", buf);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006158 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006159 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006160 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006161 return;
6162 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006163 NEXTL(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006164
6165 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00006166 * Ok the buffer is to be consumed as cdata.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006167 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006168 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006169 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillard10a2c651999-12-12 13:03:50 +00006170 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006171 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006172 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006173}
6174
Daniel Veillard11e00581998-10-24 18:27:49 +00006175/**
6176 * xmlParseContent:
6177 * @ctxt: an XML parser context
6178 *
6179 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00006180 *
6181 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6182 */
6183
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006184void
6185xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006186 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006187 while (((RAW != 0) || (ctxt->token != 0)) &&
6188 ((RAW != '<') || (NXT(1) != '/'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006189 const xmlChar *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006190 int cons = ctxt->input->consumed;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006191 xmlChar tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006192
6193 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00006194 * Handle possible processed charrefs.
6195 */
6196 if (ctxt->token != 0) {
6197 xmlParseCharData(ctxt, 0);
6198 }
6199 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00006200 * First case : a Processing Instruction.
6201 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006202 else if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006203 xmlParsePI(ctxt);
6204 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006205
Daniel Veillard260a68f1998-08-13 03:39:55 +00006206 /*
6207 * Second case : a CDSection
6208 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006209 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006210 (NXT(2) == '[') && (NXT(3) == 'C') &&
6211 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6212 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6213 (NXT(8) == '[')) {
6214 xmlParseCDSect(ctxt);
6215 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006216
Daniel Veillard260a68f1998-08-13 03:39:55 +00006217 /*
6218 * Third case : a comment
6219 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006220 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006221 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006222 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006223 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006224 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006225
Daniel Veillard260a68f1998-08-13 03:39:55 +00006226 /*
6227 * Fourth case : a sub-element.
6228 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006229 else if (RAW == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00006230 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006231 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006232
Daniel Veillard260a68f1998-08-13 03:39:55 +00006233 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00006234 * Fifth case : a reference. If if has not been resolved,
6235 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00006236 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00006237
Daniel Veillardcf461992000-03-14 18:30:20 +00006238 else if (RAW == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006239 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006240 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006241
Daniel Veillard260a68f1998-08-13 03:39:55 +00006242 /*
6243 * Last case, text. Note that References are handled directly.
6244 */
6245 else {
6246 xmlParseCharData(ctxt, 0);
6247 }
6248
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006249 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006250 /*
6251 * Pop-up of finished entities.
6252 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006253 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillardbc50b591999-03-01 12:28:53 +00006254 xmlPopInput(ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006255 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006256
Daniel Veillardb96e6431999-08-29 21:02:19 +00006257 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6258 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006259 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006261 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006262 "detected an error in element content\n");
6263 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006264 ctxt->disableSAX = 1;
Daniel Veillarde715dd22000-08-29 18:29:38 +00006265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006266 break;
6267 }
6268 }
6269}
6270
Daniel Veillard11e00581998-10-24 18:27:49 +00006271/**
6272 * xmlParseElement:
6273 * @ctxt: an XML parser context
6274 *
6275 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00006276 *
6277 * [39] element ::= EmptyElemTag | STag content ETag
6278 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006279 * [ WFC: Element Type Match ]
6280 * The Name in an element's end-tag must match the element type in the
6281 * start-tag.
6282 *
6283 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006284 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00006285 * where the Name matches the element type and one of the following holds:
6286 * - The declaration matches EMPTY and the element has no content.
6287 * - The declaration matches children and the sequence of child elements
6288 * belongs to the language generated by the regular expression in the
6289 * content model, with optional white space (characters matching the
6290 * nonterminal S) between each pair of child elements.
6291 * - The declaration matches Mixed and the content consists of character
6292 * data and child elements whose types match names in the content model.
6293 * - The declaration matches ANY, and the types of any child elements have
6294 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006295 */
6296
Daniel Veillard517752b1999-04-05 12:20:10 +00006297void
Daniel Veillard1e346af1999-02-22 10:33:01 +00006298xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006299 const xmlChar *openTag = CUR_PTR;
6300 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006301 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006302 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00006303 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006304
6305 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00006306 if (ctxt->record_info) {
6307 node_info.begin_pos = ctxt->input->consumed +
6308 (CUR_PTR - ctxt->input->base);
6309 node_info.begin_line = ctxt->input->line;
6310 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006311
Daniel Veillardcf461992000-03-14 18:30:20 +00006312 if (ctxt->spaceNr == 0)
6313 spacePush(ctxt, -1);
6314 else
6315 spacePush(ctxt, *ctxt->space);
6316
Daniel Veillard14fff061999-06-22 21:49:07 +00006317 name = xmlParseStartTag(ctxt);
6318 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006319 spacePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00006320 return;
6321 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006322 namePush(ctxt, name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00006323 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006324
6325 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006326 * [ VC: Root Element Type ]
6327 * The Name in the document type declaration must match the element
6328 * type of the root element.
6329 */
6330 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006331 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillardb05deb71999-08-10 19:04:08 +00006332 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6333
6334 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00006335 * Check for an Empty Element.
6336 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006337 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006338 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006339 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6340 (!ctxt->disableSAX))
Daniel Veillard14fff061999-06-22 21:49:07 +00006341 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006342 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006343 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006344 if (oldname != NULL) {
6345#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006346 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006347#endif
6348 xmlFree(oldname);
6349 }
Daniel Veillard87b95392000-08-12 21:12:04 +00006350 if ( ret != NULL && ctxt->record_info ) {
6351 node_info.end_pos = ctxt->input->consumed +
6352 (CUR_PTR - ctxt->input->base);
6353 node_info.end_line = ctxt->input->line;
6354 node_info.node = ret;
6355 xmlParserAddNodeInfo(ctxt, &node_info);
6356 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006357 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006358 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006359 if (RAW == '>') {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006360 NEXT;
6361 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006362 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006364 ctxt->sax->error(ctxt->userData,
6365 "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00006366 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006367 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006368 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006369
6370 /*
6371 * end of parsing of this node.
6372 */
6373 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006374 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006375 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006376 if (oldname != NULL) {
6377#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006378 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006379#endif
6380 xmlFree(oldname);
6381 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00006382
6383 /*
6384 * Capture end position and add node
6385 */
6386 if ( ret != NULL && ctxt->record_info ) {
6387 node_info.end_pos = ctxt->input->consumed +
6388 (CUR_PTR - ctxt->input->base);
6389 node_info.end_line = ctxt->input->line;
6390 node_info.node = ret;
6391 xmlParserAddNodeInfo(ctxt, &node_info);
6392 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006393 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006394 }
6395
6396 /*
6397 * Parse the content of the element:
6398 */
6399 xmlParseContent(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006400 if (!IS_CHAR(RAW)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006401 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006403 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00006404 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006405 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006406 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006407
6408 /*
6409 * end of parsing of this node.
6410 */
6411 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006412 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006413 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006414 if (oldname != NULL) {
6415#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006416 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006417#endif
6418 xmlFree(oldname);
6419 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006420 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006421 }
6422
6423 /*
6424 * parse the end of tag: '</' should be here.
6425 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006426 xmlParseEndTag(ctxt);
Daniel Veillardc26087b1999-08-30 11:23:51 +00006427
6428 /*
6429 * Capture end position and add node
6430 */
6431 if ( ret != NULL && ctxt->record_info ) {
6432 node_info.end_pos = ctxt->input->consumed +
6433 (CUR_PTR - ctxt->input->base);
6434 node_info.end_line = ctxt->input->line;
6435 node_info.node = ret;
6436 xmlParserAddNodeInfo(ctxt, &node_info);
6437 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006438}
6439
Daniel Veillard11e00581998-10-24 18:27:49 +00006440/**
6441 * xmlParseVersionNum:
6442 * @ctxt: an XML parser context
6443 *
6444 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006445 *
6446 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00006447 *
6448 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006449 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006450xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006451xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006452 xmlChar *buf = NULL;
6453 int len = 0;
6454 int size = 10;
6455 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006456
Daniel Veillard10a2c651999-12-12 13:03:50 +00006457 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6458 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006459 xmlGenericError(xmlGenericErrorContext,
6460 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006461 return(NULL);
6462 }
6463 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00006464 while (((cur >= 'a') && (cur <= 'z')) ||
6465 ((cur >= 'A') && (cur <= 'Z')) ||
6466 ((cur >= '0') && (cur <= '9')) ||
6467 (cur == '_') || (cur == '.') ||
6468 (cur == ':') || (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006469 if (len + 1 >= size) {
6470 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006471 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00006472 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006473 xmlGenericError(xmlGenericErrorContext,
6474 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006475 return(NULL);
6476 }
6477 }
6478 buf[len++] = cur;
6479 NEXT;
6480 cur=CUR;
6481 }
6482 buf[len] = 0;
6483 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006484}
6485
Daniel Veillard11e00581998-10-24 18:27:49 +00006486/**
6487 * xmlParseVersionInfo:
6488 * @ctxt: an XML parser context
6489 *
6490 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006491 *
6492 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6493 *
6494 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00006495 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006496 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00006497 */
6498
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006499xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006500xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006501 xmlChar *version = NULL;
6502 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006503
Daniel Veillardcf461992000-03-14 18:30:20 +00006504 if ((RAW == 'v') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006505 (NXT(2) == 'r') && (NXT(3) == 's') &&
6506 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6507 (NXT(6) == 'n')) {
6508 SKIP(7);
6509 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006510 if (RAW != '=') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006511 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006513 ctxt->sax->error(ctxt->userData,
6514 "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006515 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006516 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006517 return(NULL);
6518 }
6519 NEXT;
6520 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006521 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006522 NEXT;
6523 q = CUR_PTR;
6524 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006525 if (RAW != '"') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006526 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006528 ctxt->sax->error(ctxt->userData,
6529 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006530 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006531 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006532 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006533 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006534 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006535 NEXT;
6536 q = CUR_PTR;
6537 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006538 if (RAW != '\'') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006539 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006541 ctxt->sax->error(ctxt->userData,
6542 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006543 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006544 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006545 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006546 NEXT;
6547 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006548 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006550 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006551 "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006552 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006553 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006554 }
6555 }
6556 return(version);
6557}
6558
Daniel Veillard11e00581998-10-24 18:27:49 +00006559/**
6560 * xmlParseEncName:
6561 * @ctxt: an XML parser context
6562 *
6563 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00006564 *
6565 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00006566 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006567 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006568 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006569xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006570xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006571 xmlChar *buf = NULL;
6572 int len = 0;
6573 int size = 10;
6574 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006575
Daniel Veillard10a2c651999-12-12 13:03:50 +00006576 cur = CUR;
6577 if (((cur >= 'a') && (cur <= 'z')) ||
6578 ((cur >= 'A') && (cur <= 'Z'))) {
6579 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6580 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006581 xmlGenericError(xmlGenericErrorContext,
6582 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006583 return(NULL);
6584 }
6585
6586 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006587 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006588 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00006589 while (((cur >= 'a') && (cur <= 'z')) ||
6590 ((cur >= 'A') && (cur <= 'Z')) ||
6591 ((cur >= '0') && (cur <= '9')) ||
6592 (cur == '.') || (cur == '_') ||
6593 (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006594 if (len + 1 >= size) {
6595 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006596 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00006597 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006598 xmlGenericError(xmlGenericErrorContext,
6599 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006600 return(NULL);
6601 }
6602 }
6603 buf[len++] = cur;
6604 NEXT;
6605 cur = CUR;
6606 if (cur == 0) {
6607 SHRINK;
6608 GROW;
6609 cur = CUR;
6610 }
6611 }
6612 buf[len] = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006613 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006614 ctxt->errNo = XML_ERR_ENCODING_NAME;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006616 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006617 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006618 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006619 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006620 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006621}
6622
Daniel Veillard11e00581998-10-24 18:27:49 +00006623/**
6624 * xmlParseEncodingDecl:
6625 * @ctxt: an XML parser context
6626 *
6627 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00006628 *
6629 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00006630 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00006631 * this setups the conversion filters.
Daniel Veillard11e00581998-10-24 18:27:49 +00006632 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006633 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006634 */
6635
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006636xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006637xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006638 xmlChar *encoding = NULL;
6639 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006640
6641 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006642 if ((RAW == 'e') && (NXT(1) == 'n') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006643 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6644 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6645 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6646 SKIP(8);
6647 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006648 if (RAW != '=') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006649 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006651 ctxt->sax->error(ctxt->userData,
6652 "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006653 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006654 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006655 return(NULL);
6656 }
6657 NEXT;
6658 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006659 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006660 NEXT;
6661 q = CUR_PTR;
6662 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006663 if (RAW != '"') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006664 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006666 ctxt->sax->error(ctxt->userData,
6667 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006668 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006669 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006670 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006671 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006672 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006673 NEXT;
6674 q = CUR_PTR;
6675 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006676 if (RAW != '\'') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006677 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006679 ctxt->sax->error(ctxt->userData,
6680 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006681 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006682 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006683 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006684 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006685 } else if (RAW == '"'){
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006686 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006688 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006689 "xmlParseEncodingDecl : expected ' or \"\n");
6690 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006691 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006692 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006693 if (encoding != NULL) {
6694 xmlCharEncoding enc;
6695 xmlCharEncodingHandlerPtr handler;
6696
6697 if (ctxt->input->encoding != NULL)
6698 xmlFree((xmlChar *) ctxt->input->encoding);
6699 ctxt->input->encoding = encoding;
6700
6701 enc = xmlParseCharEncoding((const char *) encoding);
6702 /*
6703 * registered set of known encodings
6704 */
6705 if (enc != XML_CHAR_ENCODING_ERROR) {
6706 xmlSwitchEncoding(ctxt, enc);
6707 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6708 xmlFree(encoding);
6709 return(NULL);
6710 }
6711 } else {
6712 /*
6713 * fallback for unknown encodings
6714 */
6715 handler = xmlFindCharEncodingHandler((const char *) encoding);
6716 if (handler != NULL) {
6717 xmlSwitchToEncoding(ctxt, handler);
6718 } else {
6719 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6721 ctxt->sax->error(ctxt->userData,
6722 "Unsupported encoding %s\n", encoding);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006723 return(NULL);
6724 }
6725 }
6726 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006727 }
6728 return(encoding);
6729}
6730
Daniel Veillard11e00581998-10-24 18:27:49 +00006731/**
6732 * xmlParseSDDecl:
6733 * @ctxt: an XML parser context
6734 *
6735 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00006736 *
6737 * [32] SDDecl ::= S 'standalone' Eq
6738 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00006739 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006740 * [ VC: Standalone Document Declaration ]
6741 * TODO The standalone document declaration must have the value "no"
6742 * if any external markup declarations contain declarations of:
6743 * - attributes with default values, if elements to which these
6744 * attributes apply appear in the document without specifications
6745 * of values for these attributes, or
6746 * - entities (other than amp, lt, gt, apos, quot), if references
6747 * to those entities appear in the document, or
6748 * - attributes with values subject to normalization, where the
6749 * attribute appears in the document with a value which will change
6750 * as a result of normalization, or
6751 * - element types with element content, if white space occurs directly
6752 * within any instance of those types.
6753 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006754 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00006755 */
6756
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006757int
6758xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006759 int standalone = -1;
6760
6761 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006762 if ((RAW == 's') && (NXT(1) == 't') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006763 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6764 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6765 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6766 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6767 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006768 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006769 if (RAW != '=') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006770 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006772 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006773 "XML standalone declaration : expected '='\n");
6774 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006775 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006776 return(standalone);
6777 }
6778 NEXT;
6779 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006780 if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006781 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006782 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006783 standalone = 0;
6784 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006785 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006786 (NXT(2) == 's')) {
6787 standalone = 1;
6788 SKIP(3);
6789 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006790 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006792 ctxt->sax->error(ctxt->userData,
6793 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006794 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006795 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006796 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006797 if (RAW != '\'') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006798 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006800 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006801 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006802 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006803 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006804 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006805 } else if (RAW == '"'){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006806 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006807 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006808 standalone = 0;
6809 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006810 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006811 (NXT(2) == 's')) {
6812 standalone = 1;
6813 SKIP(3);
6814 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006815 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006817 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006818 "standalone accepts only 'yes' or 'no'\n");
6819 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006820 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006821 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006822 if (RAW != '"') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006823 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006825 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006826 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006827 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006828 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006829 NEXT;
6830 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006831 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006833 ctxt->sax->error(ctxt->userData,
6834 "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006835 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006836 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006837 }
6838 }
6839 return(standalone);
6840}
6841
Daniel Veillard11e00581998-10-24 18:27:49 +00006842/**
6843 * xmlParseXMLDecl:
6844 * @ctxt: an XML parser context
6845 *
6846 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00006847 *
6848 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6849 */
6850
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006851void
6852xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006853 xmlChar *version;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006854
6855 /*
6856 * We know that '<?xml' is here.
6857 */
6858 SKIP(5);
6859
Daniel Veillardcf461992000-03-14 18:30:20 +00006860 if (!IS_BLANK(RAW)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006861 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006863 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006864 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006865 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006866 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006867 SKIP_BLANKS;
6868
6869 /*
6870 * We should have the VersionInfo here.
6871 */
6872 version = xmlParseVersionInfo(ctxt);
6873 if (version == NULL)
6874 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00006875 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00006876 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006877
6878 /*
6879 * We may have the encoding declaration
6880 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006881 if (!IS_BLANK(RAW)) {
6882 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006883 SKIP(2);
6884 return;
6885 }
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006886 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006888 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006889 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006890 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006891 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006892 xmlParseEncodingDecl(ctxt);
6893 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6894 /*
6895 * The XML REC instructs us to stop parsing right here
6896 */
6897 return;
6898 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006899
6900 /*
6901 * We may have the standalone status.
6902 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006903 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
6904 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006905 SKIP(2);
6906 return;
6907 }
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006908 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006910 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006911 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006912 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006913 }
6914 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006915 ctxt->input->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006916
6917 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006918 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006919 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006920 } else if (RAW == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006921 /* Deprecated old WD ... */
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006922 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006924 ctxt->sax->error(ctxt->userData,
6925 "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006926 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006927 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006928 NEXT;
6929 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006930 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006932 ctxt->sax->error(ctxt->userData,
6933 "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006934 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006935 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006936 MOVETO_ENDTAG(CUR_PTR);
6937 NEXT;
6938 }
6939}
6940
Daniel Veillard11e00581998-10-24 18:27:49 +00006941/**
6942 * xmlParseMisc:
6943 * @ctxt: an XML parser context
6944 *
6945 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006946 *
6947 * [27] Misc ::= Comment | PI | S
6948 */
6949
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006950void
6951xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006952 while (((RAW == '<') && (NXT(1) == '?')) ||
6953 ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006954 (NXT(2) == '-') && (NXT(3) == '-')) ||
6955 IS_BLANK(CUR)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006956 if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006957 xmlParsePI(ctxt);
6958 } else if (IS_BLANK(CUR)) {
6959 NEXT;
6960 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00006961 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006962 }
6963}
6964
Daniel Veillard11e00581998-10-24 18:27:49 +00006965/**
Daniel Veillardcf461992000-03-14 18:30:20 +00006966 * xmlParseDocument:
Daniel Veillard11e00581998-10-24 18:27:49 +00006967 * @ctxt: an XML parser context
6968 *
6969 * parse an XML document (and build a tree if using the standard SAX
6970 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00006971 *
6972 * [1] document ::= prolog element Misc*
6973 *
6974 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00006975 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006976 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00006977 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006978 */
6979
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006980int
6981xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006982 xmlChar start[4];
6983 xmlCharEncoding enc;
6984
Daniel Veillardbc765302000-10-01 18:23:35 +00006985 xmlInitParser();
Daniel Veillard260a68f1998-08-13 03:39:55 +00006986
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006987 GROW;
6988
Daniel Veillard260a68f1998-08-13 03:39:55 +00006989 /*
6990 * SAX: beginning of the document processing.
6991 */
Daniel Veillard517752b1999-04-05 12:20:10 +00006992 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00006993 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006994
Daniel Veillardcf461992000-03-14 18:30:20 +00006995 /*
6996 * Get the 4 first bytes and decode the charset
6997 * if enc != XML_CHAR_ENCODING_NONE
6998 * plug some encoding conversion routines.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006999 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007000 start[0] = RAW;
7001 start[1] = NXT(1);
7002 start[2] = NXT(2);
7003 start[3] = NXT(3);
7004 enc = xmlDetectCharEncoding(start, 4);
7005 if (enc != XML_CHAR_ENCODING_NONE) {
7006 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007007 }
7008
Daniel Veillardcf461992000-03-14 18:30:20 +00007009
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007010 if (CUR == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007011 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007013 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007014 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007015 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007016 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007017
7018 /*
7019 * Check for the XMLDecl in the Prolog.
7020 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007021 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007022 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007023 (NXT(2) == 'x') && (NXT(3) == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00007024 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007025
7026 /*
7027 * Note that we will switch encoding on the fly.
7028 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00007029 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007030 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7031 /*
7032 * The XML REC instructs us to stop parsing right here
7033 */
7034 return(-1);
7035 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007036 ctxt->standalone = ctxt->input->standalone;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007037 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007038 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00007039 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007040 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007041 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007042 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007043
7044 /*
7045 * The Misc part of the Prolog
7046 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007047 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007048 xmlParseMisc(ctxt);
7049
7050 /*
7051 * Then possibly doc type declaration(s) and more Misc
7052 * (doctypedecl Misc*)?
7053 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007054 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007055 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007056 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7057 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7058 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7059 (NXT(8) == 'E')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007060
7061 ctxt->inSubset = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007062 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007063 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007064 ctxt->instate = XML_PARSER_DTD;
7065 xmlParseInternalSubset(ctxt);
7066 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007067
7068 /*
7069 * Create and update the external subset.
7070 */
7071 ctxt->inSubset = 2;
7072 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7073 (!ctxt->disableSAX))
7074 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7075 ctxt->extSubSystem, ctxt->extSubURI);
7076 ctxt->inSubset = 0;
7077
7078
Daniel Veillardb05deb71999-08-10 19:04:08 +00007079 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007080 xmlParseMisc(ctxt);
7081 }
7082
7083 /*
7084 * Time to start parsing the tree itself
7085 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007086 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007087 if (RAW != '<') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007088 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007090 ctxt->sax->error(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00007091 "Start tag expected, '<' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007092 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007093 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007094 ctxt->instate = XML_PARSER_EOF;
7095 } else {
7096 ctxt->instate = XML_PARSER_CONTENT;
7097 xmlParseElement(ctxt);
7098 ctxt->instate = XML_PARSER_EPILOG;
7099
7100
7101 /*
7102 * The Misc part at the end
7103 */
7104 xmlParseMisc(ctxt);
7105
Daniel Veillardcf461992000-03-14 18:30:20 +00007106 if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007107 ctxt->errNo = XML_ERR_DOCUMENT_END;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7109 ctxt->sax->error(ctxt->userData,
7110 "Extra content at the end of the document\n");
7111 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007112 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007113 }
7114 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007115 }
7116
Daniel Veillard260a68f1998-08-13 03:39:55 +00007117 /*
7118 * SAX: end of the document processing.
7119 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007120 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7121 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007122 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillardcf461992000-03-14 18:30:20 +00007123
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007124 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007125 return(0);
7126}
7127
Daniel Veillardb1059e22000-09-16 14:02:43 +00007128/**
7129 * xmlParseExtParsedEnt:
7130 * @ctxt: an XML parser context
7131 *
7132 * parse a genreral parsed entity
7133 * An external general parsed entity is well-formed if it matches the
7134 * production labeled extParsedEnt.
7135 *
7136 * [78] extParsedEnt ::= TextDecl? content
7137 *
7138 * Returns 0, -1 in case of error. the parser context is augmented
7139 * as a result of the parsing.
7140 */
7141
7142int
7143xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7144 xmlChar start[4];
7145 xmlCharEncoding enc;
7146
7147 xmlDefaultSAXHandlerInit();
7148
7149 GROW;
7150
7151 /*
7152 * SAX: beginning of the document processing.
7153 */
7154 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7155 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7156
7157 /*
7158 * Get the 4 first bytes and decode the charset
7159 * if enc != XML_CHAR_ENCODING_NONE
7160 * plug some encoding conversion routines.
7161 */
7162 start[0] = RAW;
7163 start[1] = NXT(1);
7164 start[2] = NXT(2);
7165 start[3] = NXT(3);
7166 enc = xmlDetectCharEncoding(start, 4);
7167 if (enc != XML_CHAR_ENCODING_NONE) {
7168 xmlSwitchEncoding(ctxt, enc);
7169 }
7170
7171
7172 if (CUR == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007173 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7175 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00007176 ctxt->wellFormed = 0;
7177 ctxt->disableSAX = 1;
7178 }
7179
7180 /*
7181 * Check for the XMLDecl in the Prolog.
7182 */
7183 GROW;
7184 if ((RAW == '<') && (NXT(1) == '?') &&
7185 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7186 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7187
7188 /*
7189 * Note that we will switch encoding on the fly.
7190 */
7191 xmlParseXMLDecl(ctxt);
7192 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7193 /*
7194 * The XML REC instructs us to stop parsing right here
7195 */
7196 return(-1);
7197 }
7198 SKIP_BLANKS;
7199 } else {
7200 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7201 }
7202 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7203 ctxt->sax->startDocument(ctxt->userData);
7204
7205 /*
7206 * Doing validity checking on chunk doesn't make sense
7207 */
7208 ctxt->instate = XML_PARSER_CONTENT;
7209 ctxt->validate = 0;
7210 ctxt->depth = 0;
7211
7212 xmlParseContent(ctxt);
7213
7214 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007215 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7217 ctxt->sax->error(ctxt->userData,
7218 "chunk is not well balanced\n");
7219 ctxt->wellFormed = 0;
7220 ctxt->disableSAX = 1;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007221 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007222 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7224 ctxt->sax->error(ctxt->userData,
7225 "extra content at the end of well balanced chunk\n");
7226 ctxt->wellFormed = 0;
7227 ctxt->disableSAX = 1;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007228 }
7229
7230 /*
7231 * SAX: end of the document processing.
7232 */
7233 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7234 (!ctxt->disableSAX))
7235 ctxt->sax->endDocument(ctxt->userData);
7236
7237 if (! ctxt->wellFormed) return(-1);
7238 return(0);
7239}
7240
Daniel Veillardb05deb71999-08-10 19:04:08 +00007241/************************************************************************
7242 * *
Daniel Veillard7f858501999-11-17 17:32:38 +00007243 * Progressive parsing interfaces *
7244 * *
7245 ************************************************************************/
7246
7247/**
7248 * xmlParseLookupSequence:
7249 * @ctxt: an XML parser context
7250 * @first: the first char to lookup
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007251 * @next: the next char to lookup or zero
7252 * @third: the next char to lookup or zero
Daniel Veillard7f858501999-11-17 17:32:38 +00007253 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007254 * Try to find if a sequence (first, next, third) or just (first next) or
7255 * (first) is available in the input stream.
7256 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7257 * to avoid rescanning sequences of bytes, it DOES change the state of the
7258 * parser, do not use liberally.
Daniel Veillard7f858501999-11-17 17:32:38 +00007259 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007260 * Returns the index to the current parsing point if the full sequence
7261 * is available, -1 otherwise.
Daniel Veillard7f858501999-11-17 17:32:38 +00007262 */
7263int
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007264xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7265 xmlChar next, xmlChar third) {
7266 int base, len;
7267 xmlParserInputPtr in;
7268 const xmlChar *buf;
7269
7270 in = ctxt->input;
7271 if (in == NULL) return(-1);
7272 base = in->cur - in->base;
7273 if (base < 0) return(-1);
7274 if (ctxt->checkIndex > base)
7275 base = ctxt->checkIndex;
7276 if (in->buf == NULL) {
7277 buf = in->base;
7278 len = in->length;
7279 } else {
7280 buf = in->buf->buffer->content;
7281 len = in->buf->buffer->use;
7282 }
7283 /* take into account the sequence length */
7284 if (third) len -= 2;
7285 else if (next) len --;
7286 for (;base < len;base++) {
7287 if (buf[base] == first) {
7288 if (third != 0) {
7289 if ((buf[base + 1] != next) ||
7290 (buf[base + 2] != third)) continue;
7291 } else if (next != 0) {
7292 if (buf[base + 1] != next) continue;
7293 }
7294 ctxt->checkIndex = 0;
7295#ifdef DEBUG_PUSH
7296 if (next == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007297 xmlGenericError(xmlGenericErrorContext,
7298 "PP: lookup '%c' found at %d\n",
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007299 first, base);
7300 else if (third == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007301 xmlGenericError(xmlGenericErrorContext,
7302 "PP: lookup '%c%c' found at %d\n",
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007303 first, next, base);
7304 else
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007305 xmlGenericError(xmlGenericErrorContext,
7306 "PP: lookup '%c%c%c' found at %d\n",
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007307 first, next, third, base);
7308#endif
7309 return(base - (in->cur - in->base));
7310 }
7311 }
7312 ctxt->checkIndex = base;
7313#ifdef DEBUG_PUSH
7314 if (next == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007315 xmlGenericError(xmlGenericErrorContext,
7316 "PP: lookup '%c' failed\n", first);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007317 else if (third == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007318 xmlGenericError(xmlGenericErrorContext,
7319 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007320 else
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007321 xmlGenericError(xmlGenericErrorContext,
7322 "PP: lookup '%c%c%c' failed\n", first, next, third);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007323#endif
7324 return(-1);
Daniel Veillard7f858501999-11-17 17:32:38 +00007325}
7326
7327/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00007328 * xmlParseTryOrFinish:
Daniel Veillard7f858501999-11-17 17:32:38 +00007329 * @ctxt: an XML parser context
Daniel Veillard71b656e2000-01-05 14:46:17 +00007330 * @terminate: last chunk indicator
Daniel Veillard7f858501999-11-17 17:32:38 +00007331 *
7332 * Try to progress on parsing
7333 *
7334 * Returns zero if no parsing was possible
7335 */
7336int
Daniel Veillard71b656e2000-01-05 14:46:17 +00007337xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
Daniel Veillard7f858501999-11-17 17:32:38 +00007338 int ret = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007339 int avail;
7340 xmlChar cur, next;
7341
7342#ifdef DEBUG_PUSH
7343 switch (ctxt->instate) {
7344 case XML_PARSER_EOF:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007345 xmlGenericError(xmlGenericErrorContext,
7346 "PP: try EOF\n"); break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007347 case XML_PARSER_START:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007348 xmlGenericError(xmlGenericErrorContext,
7349 "PP: try START\n"); break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007350 case XML_PARSER_MISC:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007351 xmlGenericError(xmlGenericErrorContext,
7352 "PP: try MISC\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007353 case XML_PARSER_COMMENT:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007354 xmlGenericError(xmlGenericErrorContext,
7355 "PP: try COMMENT\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007356 case XML_PARSER_PROLOG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007357 xmlGenericError(xmlGenericErrorContext,
7358 "PP: try PROLOG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007359 case XML_PARSER_START_TAG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007360 xmlGenericError(xmlGenericErrorContext,
7361 "PP: try START_TAG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007362 case XML_PARSER_CONTENT:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007363 xmlGenericError(xmlGenericErrorContext,
7364 "PP: try CONTENT\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007365 case XML_PARSER_CDATA_SECTION:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007366 xmlGenericError(xmlGenericErrorContext,
7367 "PP: try CDATA_SECTION\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007368 case XML_PARSER_END_TAG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007369 xmlGenericError(xmlGenericErrorContext,
7370 "PP: try END_TAG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007371 case XML_PARSER_ENTITY_DECL:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007372 xmlGenericError(xmlGenericErrorContext,
7373 "PP: try ENTITY_DECL\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007374 case XML_PARSER_ENTITY_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007375 xmlGenericError(xmlGenericErrorContext,
7376 "PP: try ENTITY_VALUE\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007377 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007378 xmlGenericError(xmlGenericErrorContext,
7379 "PP: try ATTRIBUTE_VALUE\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007380 case XML_PARSER_DTD:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007381 xmlGenericError(xmlGenericErrorContext,
7382 "PP: try DTD\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007383 case XML_PARSER_EPILOG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007384 xmlGenericError(xmlGenericErrorContext,
7385 "PP: try EPILOG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007386 case XML_PARSER_PI:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007387 xmlGenericError(xmlGenericErrorContext,
7388 "PP: try PI\n");break;
Daniel Veillard41e06512000-11-13 11:47:47 +00007389 case XML_PARSER_IGNORE:
7390 xmlGenericError(xmlGenericErrorContext,
7391 "PP: try IGNORE\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007392 }
7393#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00007394
7395 while (1) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007396 /*
7397 * Pop-up of finished entities.
7398 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007399 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007400 xmlPopInput(ctxt);
7401
Daniel Veillardcf461992000-03-14 18:30:20 +00007402 if (ctxt->input ==NULL) break;
7403 if (ctxt->input->buf == NULL)
7404 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007405 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007406 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007407 if (avail < 1)
7408 goto done;
Daniel Veillard7f858501999-11-17 17:32:38 +00007409 switch (ctxt->instate) {
7410 case XML_PARSER_EOF:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007411 /*
7412 * Document parsing is done !
7413 */
7414 goto done;
7415 case XML_PARSER_START:
7416 /*
7417 * Very first chars read from the document flow.
7418 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007419 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007420 if (IS_BLANK(cur)) {
7421 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7422 ctxt->sax->setDocumentLocator(ctxt->userData,
7423 &xmlDefaultSAXLocator);
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007424 ctxt->errNo = XML_ERR_DOCUMENT_START;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7426 ctxt->sax->error(ctxt->userData,
7427 "Extra spaces at the beginning of the document are not allowed\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007428 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007429 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007430 SKIP_BLANKS;
7431 ret++;
Daniel Veillardcf461992000-03-14 18:30:20 +00007432 if (ctxt->input->buf == NULL)
7433 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007434 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007435 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007436 }
7437 if (avail < 2)
7438 goto done;
7439
Daniel Veillardcf461992000-03-14 18:30:20 +00007440 cur = ctxt->input->cur[0];
7441 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007442 if (cur == 0) {
7443 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7444 ctxt->sax->setDocumentLocator(ctxt->userData,
7445 &xmlDefaultSAXLocator);
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007446 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7448 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007449 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007450 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007451 ctxt->instate = XML_PARSER_EOF;
7452#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007453 xmlGenericError(xmlGenericErrorContext,
7454 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007455#endif
7456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7457 ctxt->sax->endDocument(ctxt->userData);
7458 goto done;
7459 }
7460 if ((cur == '<') && (next == '?')) {
7461 /* PI or XML decl */
7462 if (avail < 5) return(ret);
Daniel Veillard71b656e2000-01-05 14:46:17 +00007463 if ((!terminate) &&
7464 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007465 return(ret);
7466 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7467 ctxt->sax->setDocumentLocator(ctxt->userData,
7468 &xmlDefaultSAXLocator);
Daniel Veillardcf461992000-03-14 18:30:20 +00007469 if ((ctxt->input->cur[2] == 'x') &&
7470 (ctxt->input->cur[3] == 'm') &&
7471 (ctxt->input->cur[4] == 'l') &&
7472 (IS_BLANK(ctxt->input->cur[5]))) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007473 ret += 5;
7474#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007475 xmlGenericError(xmlGenericErrorContext,
7476 "PP: Parsing XML Decl\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007477#endif
7478 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007479 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7480 /*
7481 * The XML REC instructs us to stop parsing right
7482 * here
7483 */
7484 ctxt->instate = XML_PARSER_EOF;
7485 return(0);
7486 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007487 ctxt->standalone = ctxt->input->standalone;
7488 if ((ctxt->encoding == NULL) &&
7489 (ctxt->input->encoding != NULL))
7490 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7491 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7492 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007493 ctxt->sax->startDocument(ctxt->userData);
7494 ctxt->instate = XML_PARSER_MISC;
7495#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007496 xmlGenericError(xmlGenericErrorContext,
7497 "PP: entering MISC\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007498#endif
7499 } else {
7500 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00007501 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7502 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007503 ctxt->sax->startDocument(ctxt->userData);
7504 ctxt->instate = XML_PARSER_MISC;
7505#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007506 xmlGenericError(xmlGenericErrorContext,
7507 "PP: entering MISC\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007508#endif
7509 }
7510 } else {
7511 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7512 ctxt->sax->setDocumentLocator(ctxt->userData,
7513 &xmlDefaultSAXLocator);
7514 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00007515 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7516 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007517 ctxt->sax->startDocument(ctxt->userData);
7518 ctxt->instate = XML_PARSER_MISC;
7519#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007520 xmlGenericError(xmlGenericErrorContext,
7521 "PP: entering MISC\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007522#endif
7523 }
7524 break;
7525 case XML_PARSER_MISC:
7526 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007527 if (ctxt->input->buf == NULL)
7528 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007529 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007530 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007531 if (avail < 2)
7532 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007533 cur = ctxt->input->cur[0];
7534 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007535 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007536 if ((!terminate) &&
7537 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007538 goto done;
7539#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007540 xmlGenericError(xmlGenericErrorContext,
7541 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007542#endif
7543 xmlParsePI(ctxt);
7544 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007545 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007546 if ((!terminate) &&
7547 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007548 goto done;
7549#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007550 xmlGenericError(xmlGenericErrorContext,
7551 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007552#endif
7553 xmlParseComment(ctxt);
7554 ctxt->instate = XML_PARSER_MISC;
7555 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007556 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7557 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7558 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7559 (ctxt->input->cur[8] == 'E')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007560 if ((!terminate) &&
7561 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007562 goto done;
7563#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007564 xmlGenericError(xmlGenericErrorContext,
7565 "PP: Parsing internal subset\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007566#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007567 ctxt->inSubset = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007568 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007569 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007570 ctxt->instate = XML_PARSER_DTD;
7571#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007572 xmlGenericError(xmlGenericErrorContext,
7573 "PP: entering DTD\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007574#endif
7575 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007576 /*
7577 * Create and update the external subset.
7578 */
7579 ctxt->inSubset = 2;
7580 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7581 (ctxt->sax->externalSubset != NULL))
7582 ctxt->sax->externalSubset(ctxt->userData,
7583 ctxt->intSubName, ctxt->extSubSystem,
7584 ctxt->extSubURI);
7585 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007586 ctxt->instate = XML_PARSER_PROLOG;
7587#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007588 xmlGenericError(xmlGenericErrorContext,
7589 "PP: entering PROLOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007590#endif
7591 }
7592 } else if ((cur == '<') && (next == '!') &&
7593 (avail < 9)) {
7594 goto done;
7595 } else {
7596 ctxt->instate = XML_PARSER_START_TAG;
7597#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007598 xmlGenericError(xmlGenericErrorContext,
7599 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007600#endif
7601 }
7602 break;
Daniel Veillard41e06512000-11-13 11:47:47 +00007603 case XML_PARSER_IGNORE:
7604 xmlGenericError(xmlGenericErrorContext,
7605 "PP: internal error, state == IGNORE");
7606 ctxt->instate = XML_PARSER_DTD;
7607#ifdef DEBUG_PUSH
7608 xmlGenericError(xmlGenericErrorContext,
7609 "PP: entering DTD\n");
7610#endif
7611 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007612 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007613 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007614 if (ctxt->input->buf == NULL)
7615 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007616 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007617 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007618 if (avail < 2)
7619 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007620 cur = ctxt->input->cur[0];
7621 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007622 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007623 if ((!terminate) &&
7624 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007625 goto done;
7626#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007627 xmlGenericError(xmlGenericErrorContext,
7628 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007629#endif
7630 xmlParsePI(ctxt);
7631 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007632 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007633 if ((!terminate) &&
7634 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007635 goto done;
7636#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007639#endif
7640 xmlParseComment(ctxt);
7641 ctxt->instate = XML_PARSER_PROLOG;
7642 } else if ((cur == '<') && (next == '!') &&
7643 (avail < 4)) {
7644 goto done;
7645 } else {
7646 ctxt->instate = XML_PARSER_START_TAG;
7647#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007648 xmlGenericError(xmlGenericErrorContext,
7649 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007650#endif
7651 }
7652 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007653 case XML_PARSER_EPILOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007654 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007655 if (ctxt->input->buf == NULL)
7656 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007657 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007658 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007659 if (avail < 2)
7660 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007661 cur = ctxt->input->cur[0];
7662 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007663 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007664 if ((!terminate) &&
7665 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007666 goto done;
7667#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007668 xmlGenericError(xmlGenericErrorContext,
7669 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007670#endif
7671 xmlParsePI(ctxt);
7672 ctxt->instate = XML_PARSER_EPILOG;
7673 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007674 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007675 if ((!terminate) &&
7676 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007677 goto done;
7678#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007679 xmlGenericError(xmlGenericErrorContext,
7680 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007681#endif
7682 xmlParseComment(ctxt);
7683 ctxt->instate = XML_PARSER_EPILOG;
7684 } else if ((cur == '<') && (next == '!') &&
7685 (avail < 4)) {
7686 goto done;
7687 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007688 ctxt->errNo = XML_ERR_DOCUMENT_END;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7690 ctxt->sax->error(ctxt->userData,
7691 "Extra content at the end of the document\n");
7692 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007693 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007694 ctxt->instate = XML_PARSER_EOF;
7695#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007696 xmlGenericError(xmlGenericErrorContext,
7697 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007698#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007699 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7700 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007701 ctxt->sax->endDocument(ctxt->userData);
7702 goto done;
7703 }
7704 break;
7705 case XML_PARSER_START_TAG: {
7706 xmlChar *name, *oldname;
7707
Daniel Veillardcf461992000-03-14 18:30:20 +00007708 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007709 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007710 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007711 if (cur != '<') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007712 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7714 ctxt->sax->error(ctxt->userData,
7715 "Start tag expect, '<' not found\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007716 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007717 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007718 ctxt->instate = XML_PARSER_EOF;
7719#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007720 xmlGenericError(xmlGenericErrorContext,
7721 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007722#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007723 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7724 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007725 ctxt->sax->endDocument(ctxt->userData);
7726 goto done;
7727 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00007728 if ((!terminate) &&
7729 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007730 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007731 if (ctxt->spaceNr == 0)
7732 spacePush(ctxt, -1);
7733 else
7734 spacePush(ctxt, *ctxt->space);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007735 name = xmlParseStartTag(ctxt);
7736 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007737 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007738 ctxt->instate = XML_PARSER_EOF;
7739#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007740 xmlGenericError(xmlGenericErrorContext,
7741 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007742#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007743 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7744 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007745 ctxt->sax->endDocument(ctxt->userData);
7746 goto done;
7747 }
7748 namePush(ctxt, xmlStrdup(name));
7749
7750 /*
7751 * [ VC: Root Element Type ]
7752 * The Name in the document type declaration must match
7753 * the element type of the root element.
7754 */
7755 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007756 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007757 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7758
7759 /*
7760 * Check for an Empty Element.
7761 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007762 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007763 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00007764 if ((ctxt->sax != NULL) &&
7765 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007766 ctxt->sax->endElement(ctxt->userData, name);
7767 xmlFree(name);
7768 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007769 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007770 if (oldname != NULL) {
7771#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007772 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007773#endif
7774 xmlFree(oldname);
7775 }
7776 if (ctxt->name == NULL) {
7777 ctxt->instate = XML_PARSER_EPILOG;
7778#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007779 xmlGenericError(xmlGenericErrorContext,
7780 "PP: entering EPILOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007781#endif
7782 } else {
7783 ctxt->instate = XML_PARSER_CONTENT;
7784#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007785 xmlGenericError(xmlGenericErrorContext,
7786 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007787#endif
7788 }
7789 break;
7790 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007791 if (RAW == '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007792 NEXT;
7793 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007794 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7796 ctxt->sax->error(ctxt->userData,
7797 "Couldn't find end of Start Tag %s\n",
7798 name);
7799 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007800 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007801
7802 /*
7803 * end of parsing of this node.
7804 */
7805 nodePop(ctxt);
7806 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007807 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007808 if (oldname != NULL) {
7809#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007810 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007811#endif
7812 xmlFree(oldname);
7813 }
7814 }
7815 xmlFree(name);
7816 ctxt->instate = XML_PARSER_CONTENT;
7817#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007818 xmlGenericError(xmlGenericErrorContext,
7819 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007820#endif
7821 break;
7822 }
Daniel Veillarde715dd22000-08-29 18:29:38 +00007823 case XML_PARSER_CONTENT: {
7824 const xmlChar *test;
7825 int cons;
7826 xmlChar tok;
7827
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007828 /*
7829 * Handle preparsed entities and charRef
7830 */
7831 if (ctxt->token != 0) {
7832 xmlChar cur[2] = { 0 , 0 } ;
7833
7834 cur[0] = (xmlChar) ctxt->token;
Daniel Veillardcf461992000-03-14 18:30:20 +00007835 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7836 (ctxt->sax->characters != NULL))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007837 ctxt->sax->characters(ctxt->userData, cur, 1);
7838 ctxt->token = 0;
7839 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007840 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007841 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007842 cur = ctxt->input->cur[0];
7843 next = ctxt->input->cur[1];
Daniel Veillarde715dd22000-08-29 18:29:38 +00007844
7845 test = CUR_PTR;
7846 cons = ctxt->input->consumed;
7847 tok = ctxt->token;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007848 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007849 if ((!terminate) &&
7850 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007851 goto done;
7852#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007853 xmlGenericError(xmlGenericErrorContext,
7854 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007855#endif
7856 xmlParsePI(ctxt);
7857 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007858 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007859 if ((!terminate) &&
7860 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007861 goto done;
7862#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007863 xmlGenericError(xmlGenericErrorContext,
7864 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007865#endif
7866 xmlParseComment(ctxt);
7867 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00007868 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7869 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7870 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7871 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7872 (ctxt->input->cur[8] == '[')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007873 SKIP(9);
7874 ctxt->instate = XML_PARSER_CDATA_SECTION;
7875#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007876 xmlGenericError(xmlGenericErrorContext,
7877 "PP: entering CDATA_SECTION\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007878#endif
7879 break;
7880 } else if ((cur == '<') && (next == '!') &&
7881 (avail < 9)) {
7882 goto done;
7883 } else if ((cur == '<') && (next == '/')) {
7884 ctxt->instate = XML_PARSER_END_TAG;
7885#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007886 xmlGenericError(xmlGenericErrorContext,
7887 "PP: entering END_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007888#endif
7889 break;
7890 } else if (cur == '<') {
7891 ctxt->instate = XML_PARSER_START_TAG;
7892#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007895#endif
7896 break;
7897 } else if (cur == '&') {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007898 if ((!terminate) &&
7899 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007900 goto done;
7901#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007902 xmlGenericError(xmlGenericErrorContext,
7903 "PP: Parsing Reference\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007904#endif
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007905 xmlParseReference(ctxt);
7906 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007907 /* TODO Avoid the extra copy, handle directly !!! */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007908 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00007909 * Goal of the following test is:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007910 * - minimize calls to the SAX 'character' callback
7911 * when they are mergeable
7912 * - handle an problem for isBlank when we only parse
7913 * a sequence of blank chars and the next one is
7914 * not available to check against '<' presence.
7915 * - tries to homogenize the differences in SAX
7916 * callbacks beween the push and pull versions
7917 * of the parser.
7918 */
7919 if ((ctxt->inputNr == 1) &&
7920 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007921 if ((!terminate) &&
7922 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007923 goto done;
7924 }
7925 ctxt->checkIndex = 0;
7926#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007927 xmlGenericError(xmlGenericErrorContext,
7928 "PP: Parsing char data\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007929#endif
7930 xmlParseCharData(ctxt, 0);
7931 }
7932 /*
7933 * Pop-up of finished entities.
7934 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007935 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007936 xmlPopInput(ctxt);
Daniel Veillarde715dd22000-08-29 18:29:38 +00007937 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7938 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007939 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde715dd22000-08-29 18:29:38 +00007940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7941 ctxt->sax->error(ctxt->userData,
7942 "detected an error in element content\n");
Daniel Veillarde715dd22000-08-29 18:29:38 +00007943 ctxt->wellFormed = 0;
7944 ctxt->disableSAX = 1;
7945 ctxt->instate = XML_PARSER_EOF;
7946 break;
7947 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007948 break;
Daniel Veillarde715dd22000-08-29 18:29:38 +00007949 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007950 case XML_PARSER_CDATA_SECTION: {
7951 /*
7952 * The Push mode need to have the SAX callback for
7953 * cdataBlock merge back contiguous callbacks.
7954 */
7955 int base;
7956
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007957 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7958 if (base < 0) {
7959 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007960 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007961 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00007962 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007963 XML_PARSER_BIG_BUFFER_SIZE);
7964 }
7965 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7966 ctxt->checkIndex = 0;
7967 }
7968 goto done;
7969 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007970 if ((ctxt->sax != NULL) && (base > 0) &&
7971 (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007972 if (ctxt->sax->cdataBlock != NULL)
7973 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00007974 ctxt->input->cur, base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007975 }
7976 SKIP(base + 3);
7977 ctxt->checkIndex = 0;
7978 ctxt->instate = XML_PARSER_CONTENT;
7979#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007980 xmlGenericError(xmlGenericErrorContext,
7981 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007982#endif
7983 }
7984 break;
7985 }
Daniel Veillard5e5c6231999-12-29 12:49:06 +00007986 case XML_PARSER_END_TAG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007987 if (avail < 2)
7988 goto done;
Daniel Veillard71b656e2000-01-05 14:46:17 +00007989 if ((!terminate) &&
7990 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007991 goto done;
7992 xmlParseEndTag(ctxt);
7993 if (ctxt->name == NULL) {
7994 ctxt->instate = XML_PARSER_EPILOG;
7995#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007996 xmlGenericError(xmlGenericErrorContext,
7997 "PP: entering EPILOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007998#endif
7999 } else {
8000 ctxt->instate = XML_PARSER_CONTENT;
8001#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008004#endif
8005 }
8006 break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008007 case XML_PARSER_DTD: {
8008 /*
8009 * Sorry but progressive parsing of the internal subset
8010 * is not expected to be supported. We first check that
8011 * the full content of the internal subset is available and
8012 * the parsing is launched only at that point.
8013 * Internal subset ends up with "']' S? '>'" in an unescaped
8014 * section and not in a ']]>' sequence which are conditional
8015 * sections (whoever argued to keep that crap in XML deserve
8016 * a place in hell !).
8017 */
8018 int base, i;
8019 xmlChar *buf;
8020 xmlChar quote = 0;
8021
Daniel Veillardcf461992000-03-14 18:30:20 +00008022 base = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008023 if (base < 0) return(0);
8024 if (ctxt->checkIndex > base)
8025 base = ctxt->checkIndex;
Daniel Veillardcf461992000-03-14 18:30:20 +00008026 buf = ctxt->input->buf->buffer->content;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008027 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8028 base++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008029 if (quote != 0) {
8030 if (buf[base] == quote)
8031 quote = 0;
8032 continue;
8033 }
8034 if (buf[base] == '"') {
8035 quote = '"';
8036 continue;
8037 }
8038 if (buf[base] == '\'') {
8039 quote = '\'';
8040 continue;
8041 }
8042 if (buf[base] == ']') {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008043 if ((unsigned int) base +1 >=
8044 ctxt->input->buf->buffer->use)
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008045 break;
8046 if (buf[base + 1] == ']') {
8047 /* conditional crap, skip both ']' ! */
8048 base++;
8049 continue;
8050 }
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008051 for (i = 0;
8052 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8053 i++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008054 if (buf[base + i] == '>')
8055 goto found_end_int_subset;
8056 }
8057 break;
8058 }
8059 }
8060 /*
8061 * We didn't found the end of the Internal subset
8062 */
8063 if (quote == 0)
8064 ctxt->checkIndex = base;
8065#ifdef DEBUG_PUSH
8066 if (next == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: lookup of int subset end filed\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008069#endif
8070 goto done;
8071
8072found_end_int_subset:
8073 xmlParseInternalSubset(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008074 ctxt->inSubset = 2;
8075 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8076 (ctxt->sax->externalSubset != NULL))
8077 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8078 ctxt->extSubSystem, ctxt->extSubURI);
8079 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008080 ctxt->instate = XML_PARSER_PROLOG;
8081 ctxt->checkIndex = 0;
8082#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: entering PROLOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008085#endif
8086 break;
8087 }
Daniel Veillard7f858501999-11-17 17:32:38 +00008088 case XML_PARSER_COMMENT:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: internal error, state == COMMENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008091 ctxt->instate = XML_PARSER_CONTENT;
8092#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008095#endif
8096 break;
8097 case XML_PARSER_PI:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008098 xmlGenericError(xmlGenericErrorContext,
8099 "PP: internal error, state == PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008100 ctxt->instate = XML_PARSER_CONTENT;
8101#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008102 xmlGenericError(xmlGenericErrorContext,
8103 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008104#endif
8105 break;
8106 case XML_PARSER_ENTITY_DECL:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008107 xmlGenericError(xmlGenericErrorContext,
8108 "PP: internal error, state == ENTITY_DECL\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008109 ctxt->instate = XML_PARSER_DTD;
8110#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008111 xmlGenericError(xmlGenericErrorContext,
8112 "PP: entering DTD\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008113#endif
8114 break;
8115 case XML_PARSER_ENTITY_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008116 xmlGenericError(xmlGenericErrorContext,
8117 "PP: internal error, state == ENTITY_VALUE\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008118 ctxt->instate = XML_PARSER_CONTENT;
8119#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008120 xmlGenericError(xmlGenericErrorContext,
8121 "PP: entering DTD\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008122#endif
8123 break;
8124 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008125 xmlGenericError(xmlGenericErrorContext,
8126 "PP: internal error, state == ATTRIBUTE_VALUE\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008127 ctxt->instate = XML_PARSER_START_TAG;
8128#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008129 xmlGenericError(xmlGenericErrorContext,
8130 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008131#endif
8132 break;
Daniel Veillardcf461992000-03-14 18:30:20 +00008133 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: internal error, state == SYSTEM_LITERAL\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00008136 ctxt->instate = XML_PARSER_START_TAG;
8137#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: entering START_TAG\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00008140#endif
8141 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00008142 }
8143 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008144done:
8145#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008146 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008147#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00008148 return(ret);
8149}
8150
8151/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00008152 * xmlParseTry:
8153 * @ctxt: an XML parser context
8154 *
8155 * Try to progress on parsing
8156 *
8157 * Returns zero if no parsing was possible
8158 */
8159int
8160xmlParseTry(xmlParserCtxtPtr ctxt) {
8161 return(xmlParseTryOrFinish(ctxt, 0));
8162}
8163
8164/**
Daniel Veillard7f858501999-11-17 17:32:38 +00008165 * xmlParseChunk:
8166 * @ctxt: an XML parser context
8167 * @chunk: an char array
8168 * @size: the size in byte of the chunk
8169 * @terminate: last chunk indicator
8170 *
8171 * Parse a Chunk of memory
8172 *
8173 * Returns zero if no error, the xmlParserErrors otherwise.
8174 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008175int
Daniel Veillard7f858501999-11-17 17:32:38 +00008176xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8177 int terminate) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00008178 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008179 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8180 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8181 int cur = ctxt->input->cur - ctxt->input->base;
8182
Daniel Veillarda819dac1999-11-24 18:04:22 +00008183 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008184 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8185 ctxt->input->cur = ctxt->input->base + cur;
8186#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008187 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008188#endif
8189
Daniel Veillardd0f7f742000-02-02 17:42:48 +00008190 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8191 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008192 } else if (ctxt->instate != XML_PARSER_EOF)
Daniel Veillardc2def842000-11-07 14:21:01 +00008193 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8194 xmlParserInputBufferPtr in = ctxt->input->buf;
8195 int nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8196 if (nbchars < 0) {
8197 xmlGenericError(xmlGenericErrorContext,
8198 "xmlParseChunk: encoder error\n");
8199 return(XML_ERR_INVALID_ENCODING);
8200 }
8201 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00008202 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008203 if (terminate) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008204 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008205 * Check for termination
8206 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008207 if ((ctxt->instate != XML_PARSER_EOF) &&
8208 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008209 ctxt->errNo = XML_ERR_DOCUMENT_END;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8211 ctxt->sax->error(ctxt->userData,
8212 "Extra content at the end of the document\n");
8213 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008214 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008215 }
8216 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008217 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8218 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008219 ctxt->sax->endDocument(ctxt->userData);
8220 }
8221 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard7f858501999-11-17 17:32:38 +00008222 }
8223 return((xmlParserErrors) ctxt->errNo);
8224}
8225
8226/************************************************************************
8227 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008228 * I/O front end functions to the parser *
8229 * *
8230 ************************************************************************/
8231
Daniel Veillard11e00581998-10-24 18:27:49 +00008232/**
Daniel Veillardb1059e22000-09-16 14:02:43 +00008233 * xmlStopParser:
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008234 * @ctxt: an XML parser context
8235 *
8236 * Blocks further parser processing
8237 */
8238void
8239xmlStopParser(xmlParserCtxtPtr ctxt) {
8240 ctxt->instate = XML_PARSER_EOF;
8241 if (ctxt->input != NULL)
8242 ctxt->input->cur = BAD_CAST"";
8243}
8244
8245/**
8246 * xmlCreatePushParserCtxt:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008247 * @sax: a SAX handler
8248 * @user_data: The user data returned on SAX callbacks
8249 * @chunk: a pointer to an array of chars
8250 * @size: number of chars in the array
8251 * @filename: an optional file name or URI
8252 *
8253 * Create a parser context for using the XML parser in push mode
8254 * To allow content encoding detection, @size should be >= 4
8255 * The value of @filename is used for fetching external entities
8256 * and error/warning reports.
8257 *
8258 * Returns the new parser context or NULL
8259 */
8260xmlParserCtxtPtr
8261xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8262 const char *chunk, int size, const char *filename) {
8263 xmlParserCtxtPtr ctxt;
8264 xmlParserInputPtr inputStream;
8265 xmlParserInputBufferPtr buf;
8266 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8267
8268 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008269 * plug some encoding conversion routines
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008270 */
8271 if ((chunk != NULL) && (size >= 4))
Daniel Veillardcf461992000-03-14 18:30:20 +00008272 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008273
8274 buf = xmlAllocParserInputBuffer(enc);
8275 if (buf == NULL) return(NULL);
8276
8277 ctxt = xmlNewParserCtxt();
8278 if (ctxt == NULL) {
8279 xmlFree(buf);
8280 return(NULL);
8281 }
8282 if (sax != NULL) {
8283 if (ctxt->sax != &xmlDefaultSAXHandler)
8284 xmlFree(ctxt->sax);
8285 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8286 if (ctxt->sax == NULL) {
8287 xmlFree(buf);
8288 xmlFree(ctxt);
8289 return(NULL);
8290 }
8291 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8292 if (user_data != NULL)
8293 ctxt->userData = user_data;
8294 }
8295 if (filename == NULL) {
8296 ctxt->directory = NULL;
8297 } else {
8298 ctxt->directory = xmlParserGetDirectory(filename);
8299 }
8300
8301 inputStream = xmlNewInputStream(ctxt);
8302 if (inputStream == NULL) {
8303 xmlFreeParserCtxt(ctxt);
8304 return(NULL);
8305 }
8306
8307 if (filename == NULL)
8308 inputStream->filename = NULL;
8309 else
8310 inputStream->filename = xmlMemStrdup(filename);
8311 inputStream->buf = buf;
8312 inputStream->base = inputStream->buf->buffer->content;
8313 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +00008314 if (enc != XML_CHAR_ENCODING_NONE) {
8315 xmlSwitchEncoding(ctxt, enc);
8316 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008317
8318 inputPush(ctxt, inputStream);
8319
8320 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8321 (ctxt->input->buf != NULL)) {
8322 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8323#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008324 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008325#endif
8326 }
8327
8328 return(ctxt);
8329}
8330
8331/**
Daniel Veillard5e873c42000-04-12 13:27:38 +00008332 * xmlCreateIOParserCtxt:
8333 * @sax: a SAX handler
8334 * @user_data: The user data returned on SAX callbacks
8335 * @ioread: an I/O read function
8336 * @ioclose: an I/O close function
8337 * @ioctx: an I/O handler
8338 * @enc: the charset encoding if known
8339 *
8340 * Create a parser context for using the XML parser with an existing
8341 * I/O stream
8342 *
8343 * Returns the new parser context or NULL
8344 */
8345xmlParserCtxtPtr
8346xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8347 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8348 void *ioctx, xmlCharEncoding enc) {
8349 xmlParserCtxtPtr ctxt;
8350 xmlParserInputPtr inputStream;
8351 xmlParserInputBufferPtr buf;
8352
8353 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8354 if (buf == NULL) return(NULL);
8355
8356 ctxt = xmlNewParserCtxt();
8357 if (ctxt == NULL) {
8358 xmlFree(buf);
8359 return(NULL);
8360 }
8361 if (sax != NULL) {
8362 if (ctxt->sax != &xmlDefaultSAXHandler)
8363 xmlFree(ctxt->sax);
8364 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8365 if (ctxt->sax == NULL) {
8366 xmlFree(buf);
8367 xmlFree(ctxt);
8368 return(NULL);
8369 }
8370 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8371 if (user_data != NULL)
8372 ctxt->userData = user_data;
8373 }
8374
8375 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8376 if (inputStream == NULL) {
8377 xmlFreeParserCtxt(ctxt);
8378 return(NULL);
8379 }
8380 inputPush(ctxt, inputStream);
8381
8382 return(ctxt);
8383}
8384
Daniel Veillardb1059e22000-09-16 14:02:43 +00008385/************************************************************************
8386 * *
8387 * Front ends when parsing a Dtd *
8388 * *
8389 ************************************************************************/
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008390
8391/**
Daniel Veillard2ffc3592000-10-30 15:36:47 +00008392 * xmlIOParseDTD:
8393 * @sax: the SAX handler block or NULL
8394 * @input: an Input Buffer
8395 * @enc: the charset encoding if known
8396 *
8397 * Load and parse a DTD
8398 *
8399 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillarda4964b72000-10-31 18:23:44 +00008400 * @input will be freed at parsing end.
Daniel Veillard2ffc3592000-10-30 15:36:47 +00008401 */
8402
8403xmlDtdPtr
8404xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8405 xmlCharEncoding enc) {
8406 xmlDtdPtr ret = NULL;
8407 xmlParserCtxtPtr ctxt;
8408 xmlParserInputPtr pinput = NULL;
8409
8410 if (input == NULL)
8411 return(NULL);
8412
8413 ctxt = xmlNewParserCtxt();
8414 if (ctxt == NULL) {
8415 return(NULL);
8416 }
8417
8418 /*
8419 * Set-up the SAX context
8420 */
8421 if (sax != NULL) {
8422 if (ctxt->sax != NULL)
8423 xmlFree(ctxt->sax);
8424 ctxt->sax = sax;
8425 ctxt->userData = NULL;
8426 }
8427
8428 /*
8429 * generate a parser input from the I/O handler
8430 */
8431
8432 pinput = xmlNewIOInputStream(ctxt, input, enc);
8433 if (pinput == NULL) {
8434 if (sax != NULL) ctxt->sax = NULL;
8435 xmlFreeParserCtxt(ctxt);
8436 return(NULL);
8437 }
8438
8439 /*
8440 * plug some encoding conversion routines here.
8441 */
8442 xmlPushInput(ctxt, pinput);
8443
8444 pinput->filename = NULL;
8445 pinput->line = 1;
8446 pinput->col = 1;
8447 pinput->base = ctxt->input->cur;
8448 pinput->cur = ctxt->input->cur;
8449 pinput->free = NULL;
8450
8451 /*
8452 * let's parse that entity knowing it's an external subset.
8453 */
8454 ctxt->inSubset = 2;
8455 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8456 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8457 BAD_CAST "none", BAD_CAST "none");
8458 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8459
8460 if (ctxt->myDoc != NULL) {
8461 if (ctxt->wellFormed) {
8462 ret = ctxt->myDoc->extSubset;
8463 ctxt->myDoc->extSubset = NULL;
8464 } else {
8465 ret = NULL;
8466 }
8467 xmlFreeDoc(ctxt->myDoc);
8468 ctxt->myDoc = NULL;
8469 }
8470 if (sax != NULL) ctxt->sax = NULL;
8471 xmlFreeParserCtxt(ctxt);
8472
8473 return(ret);
8474}
8475
8476/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008477 * xmlSAXParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +00008478 * @sax: the SAX handler block
8479 * @ExternalID: a NAME* containing the External ID of the DTD
8480 * @SystemID: a NAME* containing the URL to the DTD
8481 *
8482 * Load and parse an external subset.
8483 *
8484 * Returns the resulting xmlDtdPtr or NULL in case of error.
8485 */
8486
8487xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008488xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8489 const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008490 xmlDtdPtr ret = NULL;
8491 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +00008492 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008493 xmlCharEncoding enc;
8494
8495 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8496
Daniel Veillardb05deb71999-08-10 19:04:08 +00008497 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +00008498 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008499 return(NULL);
8500 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00008501
8502 /*
8503 * Set-up the SAX context
8504 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00008505 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008506 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00008507 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008508 ctxt->sax = sax;
8509 ctxt->userData = NULL;
8510 }
8511
8512 /*
8513 * Ask the Entity resolver to load the damn thing
8514 */
8515
8516 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8517 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8518 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +00008519 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008520 xmlFreeParserCtxt(ctxt);
8521 return(NULL);
8522 }
8523
8524 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008525 * plug some encoding conversion routines here.
Daniel Veillard011b63c1999-06-02 17:44:04 +00008526 */
8527 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00008528 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008529 xmlSwitchEncoding(ctxt, enc);
8530
Daniel Veillardb05deb71999-08-10 19:04:08 +00008531 if (input->filename == NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00008532 input->filename = (char *) xmlStrdup(SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008533 input->line = 1;
8534 input->col = 1;
8535 input->base = ctxt->input->cur;
8536 input->cur = ctxt->input->cur;
8537 input->free = NULL;
8538
8539 /*
8540 * let's parse that entity knowing it's an external subset.
8541 */
Daniel Veillard06047432000-04-24 11:33:38 +00008542 ctxt->inSubset = 2;
8543 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8544 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8545 ExternalID, SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008546 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8547
8548 if (ctxt->myDoc != NULL) {
8549 if (ctxt->wellFormed) {
Daniel Veillard06047432000-04-24 11:33:38 +00008550 ret = ctxt->myDoc->extSubset;
8551 ctxt->myDoc->extSubset = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008552 } else {
8553 ret = NULL;
8554 }
8555 xmlFreeDoc(ctxt->myDoc);
8556 ctxt->myDoc = NULL;
8557 }
Daniel Veillard97fea181999-06-26 23:07:37 +00008558 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008559 xmlFreeParserCtxt(ctxt);
8560
8561 return(ret);
8562}
8563
8564/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008565 * xmlParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +00008566 * @ExternalID: a NAME* containing the External ID of the DTD
8567 * @SystemID: a NAME* containing the URL to the DTD
8568 *
8569 * Load and parse an external subset.
8570 *
8571 * Returns the resulting xmlDtdPtr or NULL in case of error.
8572 */
8573
8574xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008575xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008576 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8577}
8578
Daniel Veillardb1059e22000-09-16 14:02:43 +00008579/************************************************************************
8580 * *
8581 * Front ends when parsing an Entity *
8582 * *
8583 ************************************************************************/
8584
Daniel Veillard011b63c1999-06-02 17:44:04 +00008585/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008586 * xmlSAXParseBalancedChunk:
Daniel Veillard0142b842000-01-14 14:45:24 +00008587 * @ctx: an XML parser context (possibly NULL)
8588 * @sax: the SAX handler bloc (possibly NULL)
8589 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8590 * @input: a parser input stream
8591 * @enc: the encoding
8592 *
8593 * Parse a well-balanced chunk of an XML document
8594 * The user has to provide SAX callback block whose routines will be
8595 * called by the parser
8596 * The allowed sequence for the Well Balanced Chunk is the one defined by
8597 * the content production in the XML grammar:
8598 *
8599 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8600 *
Daniel Veillardcf461992000-03-14 18:30:20 +00008601 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
Daniel Veillard0142b842000-01-14 14:45:24 +00008602 * the error code otherwise
8603 */
8604
8605int
8606xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8607 void *user_data, xmlParserInputPtr input,
8608 xmlCharEncoding enc) {
8609 xmlParserCtxtPtr ctxt;
8610 int ret;
8611
8612 if (input == NULL) return(-1);
8613
8614 if (ctx != NULL)
8615 ctxt = ctx;
8616 else {
8617 ctxt = xmlNewParserCtxt();
8618 if (ctxt == NULL)
8619 return(-1);
8620 if (sax == NULL)
8621 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8622 }
8623
8624 /*
8625 * Set-up the SAX context
8626 */
8627 if (sax != NULL) {
8628 if (ctxt->sax != NULL)
8629 xmlFree(ctxt->sax);
8630 ctxt->sax = sax;
8631 ctxt->userData = user_data;
8632 }
8633
8634 /*
8635 * plug some encoding conversion routines here.
8636 */
8637 xmlPushInput(ctxt, input);
8638 if (enc != XML_CHAR_ENCODING_NONE)
8639 xmlSwitchEncoding(ctxt, enc);
8640
8641 /*
8642 * let's parse that entity knowing it's an external subset.
8643 */
8644 xmlParseContent(ctxt);
8645 ret = ctxt->errNo;
8646
8647 if (ctx == NULL) {
8648 if (sax != NULL)
8649 ctxt->sax = NULL;
8650 else
8651 xmlFreeDoc(ctxt->myDoc);
8652 xmlFreeParserCtxt(ctxt);
8653 }
8654 return(ret);
8655}
8656
8657/**
Daniel Veillard87b95392000-08-12 21:12:04 +00008658 * xmlParseCtxtExternalEntity:
8659 * @ctx: the existing parsing context
8660 * @URL: the URL for the entity to load
8661 * @ID: the System ID for the entity to load
8662 * @list: the return value for the set of parsed nodes
8663 *
8664 * Parse an external general entity within an existing parsing context
8665 * An external general parsed entity is well-formed if it matches the
8666 * production labeled extParsedEnt.
8667 *
8668 * [78] extParsedEnt ::= TextDecl? content
8669 *
8670 * Returns 0 if the entity is well formed, -1 in case of args problem and
8671 * the parser error code otherwise
8672 */
8673
8674int
8675xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8676 const xmlChar *ID, xmlNodePtr *list) {
8677 xmlParserCtxtPtr ctxt;
8678 xmlDocPtr newDoc;
8679 xmlSAXHandlerPtr oldsax = NULL;
8680 int ret = 0;
8681
8682 if (ctx->depth > 40) {
8683 return(XML_ERR_ENTITY_LOOP);
8684 }
8685
8686 if (list != NULL)
8687 *list = NULL;
8688 if ((URL == NULL) && (ID == NULL))
8689 return(-1);
8690 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8691 return(-1);
8692
8693
Daniel Veillard39c7d712000-09-10 16:14:55 +00008694 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
Daniel Veillard87b95392000-08-12 21:12:04 +00008695 if (ctxt == NULL) return(-1);
8696 ctxt->userData = ctxt;
8697 oldsax = ctxt->sax;
8698 ctxt->sax = ctx->sax;
8699 newDoc = xmlNewDoc(BAD_CAST "1.0");
8700 if (newDoc == NULL) {
8701 xmlFreeParserCtxt(ctxt);
8702 return(-1);
8703 }
8704 if (ctx->myDoc != NULL) {
8705 newDoc->intSubset = ctx->myDoc->intSubset;
8706 newDoc->extSubset = ctx->myDoc->extSubset;
8707 }
8708 if (ctx->myDoc->URL != NULL) {
8709 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8710 }
8711 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8712 if (newDoc->children == NULL) {
8713 ctxt->sax = oldsax;
8714 xmlFreeParserCtxt(ctxt);
8715 newDoc->intSubset = NULL;
8716 newDoc->extSubset = NULL;
8717 xmlFreeDoc(newDoc);
8718 return(-1);
8719 }
8720 nodePush(ctxt, newDoc->children);
8721 if (ctx->myDoc == NULL) {
8722 ctxt->myDoc = newDoc;
8723 } else {
8724 ctxt->myDoc = ctx->myDoc;
8725 newDoc->children->doc = ctx->myDoc;
8726 }
8727
8728 /*
8729 * Parse a possible text declaration first
8730 */
8731 GROW;
8732 if ((RAW == '<') && (NXT(1) == '?') &&
8733 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8734 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8735 xmlParseTextDecl(ctxt);
8736 }
8737
8738 /*
8739 * Doing validity checking on chunk doesn't make sense
8740 */
8741 ctxt->instate = XML_PARSER_CONTENT;
8742 ctxt->validate = ctx->validate;
8743 ctxt->depth = ctx->depth + 1;
8744 ctxt->replaceEntities = ctx->replaceEntities;
8745 if (ctxt->validate) {
8746 ctxt->vctxt.error = ctx->vctxt.error;
8747 ctxt->vctxt.warning = ctx->vctxt.warning;
8748 /* Allocate the Node stack */
8749 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
Daniel Veillard39c7d712000-09-10 16:14:55 +00008750 if (ctxt->vctxt.nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008751 xmlGenericError(xmlGenericErrorContext,
8752 "xmlParseCtxtExternalEntity: out of memory\n");
Daniel Veillard39c7d712000-09-10 16:14:55 +00008753 ctxt->validate = 0;
8754 ctxt->vctxt.error = NULL;
8755 ctxt->vctxt.warning = NULL;
8756 } else {
8757 ctxt->vctxt.nodeNr = 0;
8758 ctxt->vctxt.nodeMax = 4;
8759 ctxt->vctxt.node = NULL;
8760 }
Daniel Veillard87b95392000-08-12 21:12:04 +00008761 } else {
8762 ctxt->vctxt.error = NULL;
8763 ctxt->vctxt.warning = NULL;
8764 }
8765
8766 xmlParseContent(ctxt);
8767
8768 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008769 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillard87b95392000-08-12 21:12:04 +00008770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8771 ctxt->sax->error(ctxt->userData,
8772 "chunk is not well balanced\n");
8773 ctxt->wellFormed = 0;
8774 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00008775 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008776 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillard87b95392000-08-12 21:12:04 +00008777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8778 ctxt->sax->error(ctxt->userData,
8779 "extra content at the end of well balanced chunk\n");
8780 ctxt->wellFormed = 0;
8781 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00008782 }
8783 if (ctxt->node != newDoc->children) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008784 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillard87b95392000-08-12 21:12:04 +00008785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8786 ctxt->sax->error(ctxt->userData,
8787 "chunk is not well balanced\n");
8788 ctxt->wellFormed = 0;
8789 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00008790 }
8791
8792 if (!ctxt->wellFormed) {
8793 if (ctxt->errNo == 0)
8794 ret = 1;
8795 else
8796 ret = ctxt->errNo;
8797 } else {
8798 if (list != NULL) {
8799 xmlNodePtr cur;
8800
8801 /*
8802 * Return the newly created nodeset after unlinking it from
8803 * they pseudo parent.
8804 */
8805 cur = newDoc->children->children;
8806 *list = cur;
8807 while (cur != NULL) {
8808 cur->parent = NULL;
8809 cur = cur->next;
8810 }
8811 newDoc->children->children = NULL;
8812 }
8813 ret = 0;
8814 }
8815 ctxt->sax = oldsax;
8816 xmlFreeParserCtxt(ctxt);
8817 newDoc->intSubset = NULL;
8818 newDoc->extSubset = NULL;
8819 xmlFreeDoc(newDoc);
8820
8821 return(ret);
8822}
8823
8824/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008825 * xmlParseExternalEntity:
Daniel Veillard0142b842000-01-14 14:45:24 +00008826 * @doc: the document the chunk pertains to
Daniel Veillardcf461992000-03-14 18:30:20 +00008827 * @sax: the SAX handler bloc (possibly NULL)
8828 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8829 * @depth: Used for loop detection, use 0
8830 * @URL: the URL for the entity to load
8831 * @ID: the System ID for the entity to load
8832 * @list: the return value for the set of parsed nodes
Daniel Veillard0142b842000-01-14 14:45:24 +00008833 *
Daniel Veillardcf461992000-03-14 18:30:20 +00008834 * Parse an external general entity
8835 * An external general parsed entity is well-formed if it matches the
8836 * production labeled extParsedEnt.
8837 *
8838 * [78] extParsedEnt ::= TextDecl? content
8839 *
8840 * Returns 0 if the entity is well formed, -1 in case of args problem and
8841 * the parser error code otherwise
Daniel Veillard0142b842000-01-14 14:45:24 +00008842 */
8843
Daniel Veillardcf461992000-03-14 18:30:20 +00008844int
8845xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8846 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8847 xmlParserCtxtPtr ctxt;
8848 xmlDocPtr newDoc;
8849 xmlSAXHandlerPtr oldsax = NULL;
8850 int ret = 0;
8851
8852 if (depth > 40) {
8853 return(XML_ERR_ENTITY_LOOP);
8854 }
8855
8856
8857
8858 if (list != NULL)
8859 *list = NULL;
8860 if ((URL == NULL) && (ID == NULL))
8861 return(-1);
Daniel Veillard87b95392000-08-12 21:12:04 +00008862 if (doc == NULL) /* @@ relax but check for dereferences */
8863 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00008864
8865
Daniel Veillard39c7d712000-09-10 16:14:55 +00008866 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
Daniel Veillardcf461992000-03-14 18:30:20 +00008867 if (ctxt == NULL) return(-1);
8868 ctxt->userData = ctxt;
8869 if (sax != NULL) {
8870 oldsax = ctxt->sax;
8871 ctxt->sax = sax;
8872 if (user_data != NULL)
8873 ctxt->userData = user_data;
8874 }
8875 newDoc = xmlNewDoc(BAD_CAST "1.0");
8876 if (newDoc == NULL) {
8877 xmlFreeParserCtxt(ctxt);
8878 return(-1);
8879 }
8880 if (doc != NULL) {
8881 newDoc->intSubset = doc->intSubset;
8882 newDoc->extSubset = doc->extSubset;
8883 }
8884 if (doc->URL != NULL) {
8885 newDoc->URL = xmlStrdup(doc->URL);
8886 }
8887 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8888 if (newDoc->children == NULL) {
8889 if (sax != NULL)
8890 ctxt->sax = oldsax;
8891 xmlFreeParserCtxt(ctxt);
8892 newDoc->intSubset = NULL;
8893 newDoc->extSubset = NULL;
8894 xmlFreeDoc(newDoc);
8895 return(-1);
8896 }
8897 nodePush(ctxt, newDoc->children);
8898 if (doc == NULL) {
8899 ctxt->myDoc = newDoc;
8900 } else {
8901 ctxt->myDoc = doc;
8902 newDoc->children->doc = doc;
8903 }
8904
8905 /*
8906 * Parse a possible text declaration first
8907 */
8908 GROW;
8909 if ((RAW == '<') && (NXT(1) == '?') &&
8910 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8911 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8912 xmlParseTextDecl(ctxt);
8913 }
8914
8915 /*
8916 * Doing validity checking on chunk doesn't make sense
8917 */
8918 ctxt->instate = XML_PARSER_CONTENT;
8919 ctxt->validate = 0;
8920 ctxt->depth = depth;
8921
8922 xmlParseContent(ctxt);
8923
8924 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008925 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00008926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8927 ctxt->sax->error(ctxt->userData,
8928 "chunk is not well balanced\n");
8929 ctxt->wellFormed = 0;
8930 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00008931 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008932 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8934 ctxt->sax->error(ctxt->userData,
8935 "extra content at the end of well balanced chunk\n");
8936 ctxt->wellFormed = 0;
8937 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00008938 }
8939 if (ctxt->node != newDoc->children) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008940 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00008941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8942 ctxt->sax->error(ctxt->userData,
8943 "chunk is not well balanced\n");
8944 ctxt->wellFormed = 0;
8945 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00008946 }
8947
8948 if (!ctxt->wellFormed) {
8949 if (ctxt->errNo == 0)
8950 ret = 1;
8951 else
8952 ret = ctxt->errNo;
8953 } else {
8954 if (list != NULL) {
8955 xmlNodePtr cur;
8956
8957 /*
8958 * Return the newly created nodeset after unlinking it from
8959 * they pseudo parent.
8960 */
8961 cur = newDoc->children->children;
8962 *list = cur;
8963 while (cur != NULL) {
8964 cur->parent = NULL;
8965 cur = cur->next;
8966 }
8967 newDoc->children->children = NULL;
8968 }
8969 ret = 0;
8970 }
8971 if (sax != NULL)
8972 ctxt->sax = oldsax;
8973 xmlFreeParserCtxt(ctxt);
8974 newDoc->intSubset = NULL;
8975 newDoc->extSubset = NULL;
8976 xmlFreeDoc(newDoc);
8977
8978 return(ret);
Daniel Veillard0142b842000-01-14 14:45:24 +00008979}
8980
8981/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008982 * xmlParseBalancedChunk:
8983 * @doc: the document the chunk pertains to
8984 * @sax: the SAX handler bloc (possibly NULL)
8985 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8986 * @depth: Used for loop detection, use 0
8987 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8988 * @list: the return value for the set of parsed nodes
8989 *
8990 * Parse a well-balanced chunk of an XML document
8991 * called by the parser
8992 * The allowed sequence for the Well Balanced Chunk is the one defined by
8993 * the content production in the XML grammar:
8994 *
8995 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8996 *
8997 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8998 * the parser error code otherwise
8999 */
9000
9001int
9002xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9003 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9004 xmlParserCtxtPtr ctxt;
9005 xmlDocPtr newDoc;
9006 xmlSAXHandlerPtr oldsax = NULL;
9007 int size;
9008 int ret = 0;
9009
9010 if (depth > 40) {
9011 return(XML_ERR_ENTITY_LOOP);
9012 }
9013
9014
9015 if (list != NULL)
9016 *list = NULL;
9017 if (string == NULL)
9018 return(-1);
9019
9020 size = xmlStrlen(string);
9021
9022 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9023 if (ctxt == NULL) return(-1);
9024 ctxt->userData = ctxt;
9025 if (sax != NULL) {
9026 oldsax = ctxt->sax;
9027 ctxt->sax = sax;
9028 if (user_data != NULL)
9029 ctxt->userData = user_data;
9030 }
9031 newDoc = xmlNewDoc(BAD_CAST "1.0");
9032 if (newDoc == NULL) {
9033 xmlFreeParserCtxt(ctxt);
9034 return(-1);
9035 }
9036 if (doc != NULL) {
9037 newDoc->intSubset = doc->intSubset;
9038 newDoc->extSubset = doc->extSubset;
9039 }
9040 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9041 if (newDoc->children == NULL) {
9042 if (sax != NULL)
9043 ctxt->sax = oldsax;
9044 xmlFreeParserCtxt(ctxt);
9045 newDoc->intSubset = NULL;
9046 newDoc->extSubset = NULL;
9047 xmlFreeDoc(newDoc);
9048 return(-1);
9049 }
9050 nodePush(ctxt, newDoc->children);
9051 if (doc == NULL) {
9052 ctxt->myDoc = newDoc;
9053 } else {
9054 ctxt->myDoc = doc;
9055 newDoc->children->doc = doc;
9056 }
9057 ctxt->instate = XML_PARSER_CONTENT;
9058 ctxt->depth = depth;
9059
9060 /*
9061 * Doing validity checking on chunk doesn't make sense
9062 */
9063 ctxt->validate = 0;
9064
9065 xmlParseContent(ctxt);
9066
9067 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00009068 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00009069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9070 ctxt->sax->error(ctxt->userData,
9071 "chunk is not well balanced\n");
9072 ctxt->wellFormed = 0;
9073 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00009074 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00009075 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9077 ctxt->sax->error(ctxt->userData,
9078 "extra content at the end of well balanced chunk\n");
9079 ctxt->wellFormed = 0;
9080 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00009081 }
9082 if (ctxt->node != newDoc->children) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00009083 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00009084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9085 ctxt->sax->error(ctxt->userData,
9086 "chunk is not well balanced\n");
9087 ctxt->wellFormed = 0;
9088 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00009089 }
9090
9091 if (!ctxt->wellFormed) {
9092 if (ctxt->errNo == 0)
9093 ret = 1;
9094 else
9095 ret = ctxt->errNo;
9096 } else {
9097 if (list != NULL) {
9098 xmlNodePtr cur;
9099
9100 /*
9101 * Return the newly created nodeset after unlinking it from
9102 * they pseudo parent.
9103 */
9104 cur = newDoc->children->children;
9105 *list = cur;
9106 while (cur != NULL) {
9107 cur->parent = NULL;
9108 cur = cur->next;
9109 }
9110 newDoc->children->children = NULL;
9111 }
9112 ret = 0;
9113 }
9114 if (sax != NULL)
9115 ctxt->sax = oldsax;
9116 xmlFreeParserCtxt(ctxt);
9117 newDoc->intSubset = NULL;
9118 newDoc->extSubset = NULL;
9119 xmlFreeDoc(newDoc);
9120
9121 return(ret);
9122}
9123
9124/**
Daniel Veillardb1059e22000-09-16 14:02:43 +00009125 * xmlSAXParseEntity:
9126 * @sax: the SAX handler block
9127 * @filename: the filename
Daniel Veillard0142b842000-01-14 14:45:24 +00009128 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00009129 * parse an XML external entity out of context and build a tree.
9130 * It use the given SAX function block to handle the parsing callback.
9131 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009132 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00009133 * [78] extParsedEnt ::= TextDecl? content
9134 *
9135 * This correspond to a "Well Balanced" chunk
9136 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009137 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009138 */
9139
Daniel Veillard1e346af1999-02-22 10:33:01 +00009140xmlDocPtr
Daniel Veillardb1059e22000-09-16 14:02:43 +00009141xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9142 xmlDocPtr ret;
9143 xmlParserCtxtPtr ctxt;
9144 char *directory = NULL;
9145
9146 ctxt = xmlCreateFileParserCtxt(filename);
9147 if (ctxt == NULL) {
9148 return(NULL);
9149 }
9150 if (sax != NULL) {
9151 if (ctxt->sax != NULL)
9152 xmlFree(ctxt->sax);
9153 ctxt->sax = sax;
9154 ctxt->userData = NULL;
9155 }
9156
9157 if ((ctxt->directory == NULL) && (directory == NULL))
9158 directory = xmlParserGetDirectory(filename);
9159
9160 xmlParseExtParsedEnt(ctxt);
9161
9162 if (ctxt->wellFormed)
9163 ret = ctxt->myDoc;
9164 else {
9165 ret = NULL;
9166 xmlFreeDoc(ctxt->myDoc);
9167 ctxt->myDoc = NULL;
9168 }
9169 if (sax != NULL)
9170 ctxt->sax = NULL;
9171 xmlFreeParserCtxt(ctxt);
9172
9173 return(ret);
9174}
9175
9176/**
9177 * xmlParseEntity:
9178 * @filename: the filename
9179 *
9180 * parse an XML external entity out of context and build a tree.
9181 *
9182 * [78] extParsedEnt ::= TextDecl? content
9183 *
9184 * This correspond to a "Well Balanced" chunk
9185 *
9186 * Returns the resulting document tree
9187 */
9188
9189xmlDocPtr
9190xmlParseEntity(const char *filename) {
9191 return(xmlSAXParseEntity(NULL, filename));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009192}
9193
9194/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009195 * xmlCreateEntityParserCtxt:
9196 * @URL: the entity URL
9197 * @ID: the entity PUBLIC ID
9198 * @base: a posible base for the target URI
9199 *
9200 * Create a parser context for an external entity
9201 * Automatic support for ZLIB/Compress compressed document is provided
9202 * by default if found at compile-time.
9203 *
9204 * Returns the new parser context or NULL
9205 */
9206xmlParserCtxtPtr
9207xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9208 const xmlChar *base) {
9209 xmlParserCtxtPtr ctxt;
9210 xmlParserInputPtr inputStream;
9211 char *directory = NULL;
Daniel Veillard87b95392000-08-12 21:12:04 +00009212 xmlChar *uri;
9213
Daniel Veillardcf461992000-03-14 18:30:20 +00009214 ctxt = xmlNewParserCtxt();
9215 if (ctxt == NULL) {
9216 return(NULL);
9217 }
9218
Daniel Veillard87b95392000-08-12 21:12:04 +00009219 uri = xmlBuildURI(URL, base);
9220
9221 if (uri == NULL) {
9222 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9223 if (inputStream == NULL) {
9224 xmlFreeParserCtxt(ctxt);
9225 return(NULL);
9226 }
9227
9228 inputPush(ctxt, inputStream);
9229
9230 if ((ctxt->directory == NULL) && (directory == NULL))
9231 directory = xmlParserGetDirectory((char *)URL);
9232 if ((ctxt->directory == NULL) && (directory != NULL))
9233 ctxt->directory = directory;
9234 } else {
9235 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9236 if (inputStream == NULL) {
9237 xmlFreeParserCtxt(ctxt);
9238 return(NULL);
9239 }
9240
9241 inputPush(ctxt, inputStream);
9242
9243 if ((ctxt->directory == NULL) && (directory == NULL))
9244 directory = xmlParserGetDirectory((char *)uri);
9245 if ((ctxt->directory == NULL) && (directory != NULL))
9246 ctxt->directory = directory;
9247 xmlFree(uri);
Daniel Veillardcf461992000-03-14 18:30:20 +00009248 }
9249
Daniel Veillardcf461992000-03-14 18:30:20 +00009250 return(ctxt);
9251}
9252
Daniel Veillardb1059e22000-09-16 14:02:43 +00009253/************************************************************************
9254 * *
9255 * Front ends when parsing from a file *
9256 * *
9257 ************************************************************************/
9258
Daniel Veillardcf461992000-03-14 18:30:20 +00009259/**
9260 * xmlCreateFileParserCtxt:
Daniel Veillard11e00581998-10-24 18:27:49 +00009261 * @filename: the filename
9262 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009263 * Create a parser context for a file content.
9264 * Automatic support for ZLIB/Compress compressed document is provided
9265 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +00009266 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009267 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00009268 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00009269xmlParserCtxtPtr
9270xmlCreateFileParserCtxt(const char *filename)
9271{
9272 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009273 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009274 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +00009275 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009276
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009277 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard39c7d712000-09-10 16:14:55 +00009278 if (buf == NULL) {
9279 return(NULL);
9280 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009281
Daniel Veillardb05deb71999-08-10 19:04:08 +00009282 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00009283 if (ctxt == NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +00009284 if (xmlDefaultSAXHandler.error != NULL) {
9285 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9286 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009287 return(NULL);
9288 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00009289
9290 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009291 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00009292 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009293 return(NULL);
9294 }
9295
Daniel Veillard6454aec1999-09-02 22:04:43 +00009296 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009297 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009298 inputStream->base = inputStream->buf->buffer->content;
9299 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009300
9301 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +00009302 if ((ctxt->directory == NULL) && (directory == NULL))
9303 directory = xmlParserGetDirectory(filename);
9304 if ((ctxt->directory == NULL) && (directory != NULL))
9305 ctxt->directory = directory;
9306
Daniel Veillardd692aa41999-02-28 21:54:31 +00009307 return(ctxt);
9308}
9309
9310/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009311 * xmlSAXParseFile:
Daniel Veillardd692aa41999-02-28 21:54:31 +00009312 * @sax: the SAX handler block
9313 * @filename: the filename
9314 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9315 * documents
9316 *
9317 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9318 * compressed document is provided by default if found at compile-time.
9319 * It use the given SAX function block to handle the parsing callback.
9320 * If sax is NULL, fallback to the default DOM tree building routines.
9321 *
9322 * Returns the resulting document tree
9323 */
9324
Daniel Veillard011b63c1999-06-02 17:44:04 +00009325xmlDocPtr
9326xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +00009327 int recovery) {
9328 xmlDocPtr ret;
9329 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +00009330 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009331
9332 ctxt = xmlCreateFileParserCtxt(filename);
Daniel Veillard39c7d712000-09-10 16:14:55 +00009333 if (ctxt == NULL) {
9334 return(NULL);
9335 }
Daniel Veillard27d88741999-05-29 11:51:49 +00009336 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009337 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00009338 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +00009339 ctxt->sax = sax;
9340 ctxt->userData = NULL;
9341 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009342
Daniel Veillardb05deb71999-08-10 19:04:08 +00009343 if ((ctxt->directory == NULL) && (directory == NULL))
9344 directory = xmlParserGetDirectory(filename);
9345 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00009346 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +00009347
Daniel Veillard260a68f1998-08-13 03:39:55 +00009348 xmlParseDocument(ctxt);
9349
Daniel Veillard517752b1999-04-05 12:20:10 +00009350 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009351 else {
9352 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00009353 xmlFreeDoc(ctxt->myDoc);
9354 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009355 }
Daniel Veillard97fea181999-06-26 23:07:37 +00009356 if (sax != NULL)
9357 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009358 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009359
9360 return(ret);
9361}
9362
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009363/**
Daniel Veillardb1059e22000-09-16 14:02:43 +00009364 * xmlRecoverDoc:
9365 * @cur: a pointer to an array of xmlChar
9366 *
9367 * parse an XML in-memory document and build a tree.
9368 * In the case the document is not Well Formed, a tree is built anyway
9369 *
9370 * Returns the resulting document tree
9371 */
9372
9373xmlDocPtr
9374xmlRecoverDoc(xmlChar *cur) {
9375 return(xmlSAXParseDoc(NULL, cur, 1));
9376}
9377
9378/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009379 * xmlParseFile:
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009380 * @filename: the filename
9381 *
9382 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9383 * compressed document is provided by default if found at compile-time.
9384 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009385 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009386 */
9387
Daniel Veillard011b63c1999-06-02 17:44:04 +00009388xmlDocPtr
9389xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009390 return(xmlSAXParseFile(NULL, filename, 0));
9391}
9392
9393/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009394 * xmlRecoverFile:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009395 * @filename: the filename
9396 *
9397 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9398 * compressed document is provided by default if found at compile-time.
9399 * In the case the document is not Well Formed, a tree is built anyway
9400 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009401 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009402 */
9403
Daniel Veillard011b63c1999-06-02 17:44:04 +00009404xmlDocPtr
9405xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009406 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009407}
Daniel Veillard260a68f1998-08-13 03:39:55 +00009408
Daniel Veillardb1059e22000-09-16 14:02:43 +00009409
9410/**
9411 * xmlSetupParserForBuffer:
9412 * @ctxt: an XML parser context
9413 * @buffer: a xmlChar * buffer
9414 * @filename: a file name
9415 *
9416 * Setup the parser context to parse a new buffer; Clears any prior
9417 * contents from the parser context. The buffer parameter must not be
9418 * NULL, but the filename parameter can be
9419 */
9420void
9421xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9422 const char* filename)
9423{
9424 xmlParserInputPtr input;
9425
9426 input = xmlNewInputStream(ctxt);
9427 if (input == NULL) {
9428 perror("malloc");
9429 xmlFree(ctxt);
9430 return;
9431 }
9432
9433 xmlClearParserCtxt(ctxt);
9434 if (filename != NULL)
9435 input->filename = xmlMemStrdup(filename);
9436 input->base = buffer;
9437 input->cur = buffer;
9438 inputPush(ctxt, input);
9439}
9440
9441/**
9442 * xmlSAXUserParseFile:
9443 * @sax: a SAX handler
9444 * @user_data: The user data returned on SAX callbacks
9445 * @filename: a file name
9446 *
9447 * parse an XML file and call the given SAX handler routines.
9448 * Automatic support for ZLIB/Compress compressed document is provided
9449 *
9450 * Returns 0 in case of success or a error number otherwise
9451 */
9452int
9453xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9454 const char *filename) {
9455 int ret = 0;
9456 xmlParserCtxtPtr ctxt;
9457
9458 ctxt = xmlCreateFileParserCtxt(filename);
9459 if (ctxt == NULL) return -1;
9460 if (ctxt->sax != &xmlDefaultSAXHandler)
9461 xmlFree(ctxt->sax);
9462 ctxt->sax = sax;
9463 if (user_data != NULL)
9464 ctxt->userData = user_data;
9465
9466 xmlParseDocument(ctxt);
9467
9468 if (ctxt->wellFormed)
9469 ret = 0;
9470 else {
9471 if (ctxt->errNo != 0)
9472 ret = ctxt->errNo;
9473 else
9474 ret = -1;
9475 }
9476 if (sax != NULL)
9477 ctxt->sax = NULL;
9478 xmlFreeParserCtxt(ctxt);
9479
9480 return ret;
9481}
9482
9483/************************************************************************
9484 * *
9485 * Front ends when parsing from memory *
9486 * *
9487 ************************************************************************/
9488
Daniel Veillard11e00581998-10-24 18:27:49 +00009489/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009490 * xmlCreateMemoryParserCtxt:
Daniel Veillardb1059e22000-09-16 14:02:43 +00009491 * @buffer: a pointer to a char array
9492 * @size: the size of the array
Daniel Veillard11e00581998-10-24 18:27:49 +00009493 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009494 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +00009495 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009496 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00009497 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00009498xmlParserCtxtPtr
9499xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009500 xmlParserCtxtPtr ctxt;
9501 xmlParserInputPtr input;
Daniel Veillard46e370e2000-07-21 20:32:03 +00009502 xmlParserInputBufferPtr buf;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009503
Daniel Veillardb1059e22000-09-16 14:02:43 +00009504 if (buffer == NULL)
9505 return(NULL);
9506 if (size <= 0)
Daniel Veillardb566ce12000-03-04 11:39:42 +00009507 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009508
Daniel Veillardb05deb71999-08-10 19:04:08 +00009509 ctxt = xmlNewParserCtxt();
Daniel Veillardcf461992000-03-14 18:30:20 +00009510 if (ctxt == NULL)
Daniel Veillard260a68f1998-08-13 03:39:55 +00009511 return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00009512
Daniel Veillard46e370e2000-07-21 20:32:03 +00009513 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9514 if (buf == NULL) return(NULL);
9515
Daniel Veillardb05deb71999-08-10 19:04:08 +00009516 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009517 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00009518 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009519 return(NULL);
9520 }
9521
9522 input->filename = NULL;
Daniel Veillard46e370e2000-07-21 20:32:03 +00009523 input->buf = buf;
9524 input->base = input->buf->buffer->content;
9525 input->cur = input->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009526
9527 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00009528 return(ctxt);
9529}
9530
9531/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009532 * xmlSAXParseMemory:
Daniel Veillardd692aa41999-02-28 21:54:31 +00009533 * @sax: the SAX handler block
9534 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +00009535 * @size: the size of the array
9536 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
Daniel Veillardd692aa41999-02-28 21:54:31 +00009537 * documents
9538 *
9539 * parse an XML in-memory block and use the given SAX function block
9540 * to handle the parsing callback. If sax is NULL, fallback to the default
9541 * DOM tree building routines.
9542 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009543 * Returns the resulting document tree
9544 */
9545xmlDocPtr
9546xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9547 xmlDocPtr ret;
9548 xmlParserCtxtPtr ctxt;
9549
9550 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9551 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00009552 if (sax != NULL) {
9553 ctxt->sax = sax;
9554 ctxt->userData = NULL;
9555 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009556
9557 xmlParseDocument(ctxt);
9558
Daniel Veillard517752b1999-04-05 12:20:10 +00009559 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009560 else {
9561 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00009562 xmlFreeDoc(ctxt->myDoc);
9563 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009564 }
Daniel Veillard97fea181999-06-26 23:07:37 +00009565 if (sax != NULL)
9566 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009567 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009568
9569 return(ret);
9570}
9571
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009572/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009573 * xmlParseMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +00009574 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009575 * @size: the size of the array
9576 *
9577 * parse an XML in-memory block and build a tree.
9578 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009579 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009580 */
9581
9582xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009583 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9584}
9585
9586/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009587 * xmlRecoverMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +00009588 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009589 * @size: the size of the array
9590 *
9591 * parse an XML in-memory block and build a tree.
9592 * In the case the document is not Well Formed, a tree is built anyway
9593 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009594 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009595 */
9596
9597xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9598 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009599}
Daniel Veillard260a68f1998-08-13 03:39:55 +00009600
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009601/**
9602 * xmlSAXUserParseMemory:
9603 * @sax: a SAX handler
9604 * @user_data: The user data returned on SAX callbacks
9605 * @buffer: an in-memory XML document input
Daniel Veillard51e3b151999-11-12 17:02:31 +00009606 * @size: the length of the XML document in bytes
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009607 *
9608 * A better SAX parsing routine.
9609 * parse an XML in-memory buffer and call the given SAX handler routines.
9610 *
9611 * Returns 0 in case of success or a error number otherwise
9612 */
9613int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9614 char *buffer, int size) {
9615 int ret = 0;
9616 xmlParserCtxtPtr ctxt;
Daniel Veillard87b95392000-08-12 21:12:04 +00009617 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009618
9619 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9620 if (ctxt == NULL) return -1;
Daniel Veillard87b95392000-08-12 21:12:04 +00009621 if (sax != NULL) {
9622 oldsax = ctxt->sax;
9623 ctxt->sax = sax;
9624 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009625 ctxt->userData = user_data;
9626
9627 xmlParseDocument(ctxt);
9628
9629 if (ctxt->wellFormed)
9630 ret = 0;
9631 else {
9632 if (ctxt->errNo != 0)
9633 ret = ctxt->errNo;
9634 else
9635 ret = -1;
9636 }
Daniel Veillard87b95392000-08-12 21:12:04 +00009637 if (sax != NULL) {
9638 ctxt->sax = oldsax;
9639 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009640 xmlFreeParserCtxt(ctxt);
9641
9642 return ret;
9643}
9644
Daniel Veillardb1059e22000-09-16 14:02:43 +00009645/**
9646 * xmlCreateDocParserCtxt:
9647 * @cur: a pointer to an array of xmlChar
9648 *
9649 * Creates a parser context for an XML in-memory document.
9650 *
9651 * Returns the new parser context or NULL
9652 */
9653xmlParserCtxtPtr
9654xmlCreateDocParserCtxt(xmlChar *cur) {
9655 int len;
9656
9657 if (cur == NULL)
9658 return(NULL);
9659 len = xmlStrlen(cur);
9660 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9661}
9662
9663/**
9664 * xmlSAXParseDoc:
9665 * @sax: the SAX handler block
9666 * @cur: a pointer to an array of xmlChar
9667 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9668 * documents
9669 *
9670 * parse an XML in-memory document and build a tree.
9671 * It use the given SAX function block to handle the parsing callback.
9672 * If sax is NULL, fallback to the default DOM tree building routines.
9673 *
9674 * Returns the resulting document tree
9675 */
9676
9677xmlDocPtr
9678xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9679 xmlDocPtr ret;
9680 xmlParserCtxtPtr ctxt;
9681
9682 if (cur == NULL) return(NULL);
9683
9684
9685 ctxt = xmlCreateDocParserCtxt(cur);
9686 if (ctxt == NULL) return(NULL);
9687 if (sax != NULL) {
9688 ctxt->sax = sax;
9689 ctxt->userData = NULL;
9690 }
9691
9692 xmlParseDocument(ctxt);
9693 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9694 else {
9695 ret = NULL;
9696 xmlFreeDoc(ctxt->myDoc);
9697 ctxt->myDoc = NULL;
9698 }
9699 if (sax != NULL)
9700 ctxt->sax = NULL;
9701 xmlFreeParserCtxt(ctxt);
9702
9703 return(ret);
9704}
9705
9706/**
9707 * xmlParseDoc:
9708 * @cur: a pointer to an array of xmlChar
9709 *
9710 * parse an XML in-memory document and build a tree.
9711 *
9712 * Returns the resulting document tree
9713 */
9714
9715xmlDocPtr
9716xmlParseDoc(xmlChar *cur) {
9717 return(xmlSAXParseDoc(NULL, cur, 0));
9718}
9719
Daniel Veillard260a68f1998-08-13 03:39:55 +00009720
Daniel Veillardb05deb71999-08-10 19:04:08 +00009721/************************************************************************
9722 * *
Daniel Veillard51e3b151999-11-12 17:02:31 +00009723 * Miscellaneous *
Daniel Veillardb05deb71999-08-10 19:04:08 +00009724 * *
9725 ************************************************************************/
9726
Daniel Veillard7cfce322000-10-04 12:40:27 +00009727#ifdef LIBXML_XPATH_ENABLED
9728#include <libxml/xpath.h>
9729#endif
9730
Daniel Veillardbc765302000-10-01 18:23:35 +00009731static int xmlParserInitialized = 0;
9732
9733/**
9734 * xmlInitParser:
9735 *
9736 * Initialization function for the XML parser.
9737 * This is not reentrant. Call once before processing in case of
9738 * use in multithreaded programs.
9739 */
9740
9741void
9742xmlInitParser(void) {
9743 if (xmlParserInitialized) return;
9744
9745 xmlInitCharEncodingHandlers();
9746 xmlInitializePredefinedEntities();
9747 xmlDefaultSAXHandlerInit();
Daniel Veillard7cfce322000-10-04 12:40:27 +00009748 xmlRegisterDefaultInputCallbacks();
9749 xmlRegisterDefaultOutputCallbacks();
Daniel Veillardbc765302000-10-01 18:23:35 +00009750#ifdef LIBXML_HTML_ENABLED
9751 htmlInitAutoClose();
9752 htmlDefaultSAXHandlerInit();
9753#endif
Daniel Veillard7cfce322000-10-04 12:40:27 +00009754#ifdef LIBXML_XPATH_ENABLED
9755 xmlXPathInit();
9756#endif
Daniel Veillardbc765302000-10-01 18:23:35 +00009757 xmlParserInitialized = 1;
9758}
9759
Daniel Veillarda819dac1999-11-24 18:04:22 +00009760/**
9761 * xmlCleanupParser:
9762 *
9763 * Cleanup function for the XML parser. It tries to reclaim all
9764 * parsing related global memory allocated for the parser processing.
9765 * It doesn't deallocate any document related memory. Calling this
9766 * function should not prevent reusing the parser.
9767 */
9768
9769void
9770xmlCleanupParser(void) {
Daniel Veillardbc765302000-10-01 18:23:35 +00009771 xmlParserInitialized = 0;
Daniel Veillarda819dac1999-11-24 18:04:22 +00009772 xmlCleanupCharEncodingHandlers();
Daniel Veillardf5c2c871999-12-01 09:51:45 +00009773 xmlCleanupPredefinedEntities();
Daniel Veillarda819dac1999-11-24 18:04:22 +00009774}
Daniel Veillardb05deb71999-08-10 19:04:08 +00009775
Daniel Veillard11e00581998-10-24 18:27:49 +00009776/**
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00009777 * xmlPedanticParserDefault:
9778 * @val: int 0 or 1
9779 *
9780 * Set and return the previous value for enabling pedantic warnings.
9781 *
9782 * Returns the last value for 0 for no substitution, 1 for substitution.
9783 */
9784
9785int
9786xmlPedanticParserDefault(int val) {
9787 int old = xmlPedanticParserDefaultValue;
9788
9789 xmlPedanticParserDefaultValue = val;
9790 return(old);
9791}
9792
9793/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009794 * xmlSubstituteEntitiesDefault:
Daniel Veillardb05deb71999-08-10 19:04:08 +00009795 * @val: int 0 or 1
9796 *
9797 * Set and return the previous value for default entity support.
9798 * Initially the parser always keep entity references instead of substituting
9799 * entity values in the output. This function has to be used to change the
9800 * default parser behaviour
9801 * SAX::subtituteEntities() has to be used for changing that on a file by
9802 * file basis.
9803 *
9804 * Returns the last value for 0 for no substitution, 1 for substitution.
9805 */
9806
9807int
9808xmlSubstituteEntitiesDefault(int val) {
9809 int old = xmlSubstituteEntitiesDefaultValue;
9810
9811 xmlSubstituteEntitiesDefaultValue = val;
9812 return(old);
9813}
9814
Daniel Veillardfb76c402000-03-04 11:39:42 +00009815/**
9816 * xmlKeepBlanksDefault:
9817 * @val: int 0 or 1
9818 *
9819 * Set and return the previous value for default blanks text nodes support.
9820 * The 1.x version of the parser used an heuristic to try to detect
9821 * ignorable white spaces. As a result the SAX callback was generating
9822 * ignorableWhitespace() callbacks instead of characters() one, and when
9823 * using the DOM output text nodes containing those blanks were not generated.
9824 * The 2.x and later version will switch to the XML standard way and
9825 * ignorableWhitespace() are only generated when running the parser in
9826 * validating mode and when the current element doesn't allow CDATA or
9827 * mixed content.
9828 * This function is provided as a way to force the standard behaviour
9829 * on 1.X libs and to switch back to the old mode for compatibility when
9830 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9831 * by using xmlIsBlankNode() commodity function to detect the "empty"
9832 * nodes generated.
9833 * This value also affect autogeneration of indentation when saving code
9834 * if blanks sections are kept, indentation is not generated.
9835 *
9836 * Returns the last value for 0 for no substitution, 1 for substitution.
9837 */
9838
9839int
9840xmlKeepBlanksDefault(int val) {
9841 int old = xmlKeepBlanksDefaultValue;
9842
9843 xmlKeepBlanksDefaultValue = val;
9844 xmlIndentTreeOutput = !val;
9845 return(old);
9846}
9847