blob: 16929b68db804374d489b88de214a09d0c925f41 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
Daniel Veillardb1059e22000-09-16 14:02:43 +00002 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
Daniel Veillard260a68f1998-08-13 03:39:55 +00004 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00005 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
Daniel Veillardb1059e22000-09-16 14:02:43 +000013 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000028 * See Copyright for the status of this software.
29 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000030 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +000031 */
32
33#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000034#include "win32config.h"
Daniel Veillard39c7d712000-09-10 16:14:55 +000035#define XML_DIR_SEP '\\'
Daniel Veillard260a68f1998-08-13 03:39:55 +000036#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000037#include "config.h"
Daniel Veillard39c7d712000-09-10 16:14:55 +000038#define XML_DIR_SEP '/'
Daniel Veillard260a68f1998-08-13 03:39:55 +000039#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000040
Daniel Veillard260a68f1998-08-13 03:39:55 +000041#include <stdio.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000042#include <stdlib.h>
Daniel Veillard32bc74e2000-07-14 14:49:25 +000043#include <string.h>
Daniel Veillardb71379b2000-10-09 12:30:39 +000044#include <libxml/xmlmemory.h>
45#include <libxml/tree.h>
46#include <libxml/parser.h>
47#include <libxml/parserInternals.h>
48#include <libxml/valid.h>
49#include <libxml/entities.h>
50#include <libxml/xmlerror.h>
51#include <libxml/encoding.h>
52#include <libxml/xmlIO.h>
53#include <libxml/uri.h>
54
Daniel Veillard7f7d1111999-09-22 09:46:25 +000055#ifdef HAVE_CTYPE_H
56#include <ctype.h>
57#endif
58#ifdef HAVE_STDLIB_H
Seth Alvese7f12e61998-10-01 20:51:15 +000059#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000060#endif
61#ifdef HAVE_SYS_STAT_H
Daniel Veillard260a68f1998-08-13 03:39:55 +000062#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000063#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +000064#ifdef HAVE_FCNTL_H
65#include <fcntl.h>
66#endif
67#ifdef HAVE_UNISTD_H
68#include <unistd.h>
69#endif
70#ifdef HAVE_ZLIB_H
71#include <zlib.h>
72#endif
73
Daniel Veillard260a68f1998-08-13 03:39:55 +000074
Daniel Veillarddbfd6411999-12-28 16:35:14 +000075#define XML_PARSER_BIG_BUFFER_SIZE 1000
76#define XML_PARSER_BUFFER_SIZE 100
77
Daniel Veillardb1059e22000-09-16 14:02:43 +000078/*
79 * Various global defaults for parsing
80 */
Daniel Veillardcf461992000-03-14 18:30:20 +000081int xmlGetWarningsDefaultValue = 1;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +000082int xmlParserDebugEntities = 0;
Daniel Veillardb1059e22000-09-16 14:02:43 +000083int xmlSubstituteEntitiesDefaultValue = 0;
84int xmlDoValidityCheckingDefaultValue = 0;
85int xmlPedanticParserDefaultValue = 0;
86int xmlKeepBlanksDefaultValue = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +000087
Daniel Veillard3c558c31999-12-22 11:30:41 +000088/*
89 * List of XML prefixed PI allowed by W3C specs
90 */
91
92const char *xmlW3CPIs[] = {
93 "xml-stylesheet",
94 NULL
95};
Daniel Veillarde2d034d1999-07-27 19:52:06 +000096
Daniel Veillardb1059e22000-09-16 14:02:43 +000097/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillardcf461992000-03-14 18:30:20 +000098void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
99xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
100 const xmlChar **str);
Daniel Veillardbe803962000-06-28 23:40:59 +0000101
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000102
Daniel Veillard260a68f1998-08-13 03:39:55 +0000103/************************************************************************
104 * *
105 * Parser stacks related functions and macros *
106 * *
107 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000108
Daniel Veillard10a2c651999-12-12 13:03:50 +0000109xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
110 const xmlChar ** str);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000111
Daniel Veillard260a68f1998-08-13 03:39:55 +0000112/*
113 * Generic function for accessing stacks in the Parser Context
114 */
115
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000116#define PUSH_AND_POP(scope, type, name) \
117scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000118 if (ctxt->name##Nr >= ctxt->name##Max) { \
119 ctxt->name##Max *= 2; \
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000120 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000121 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
122 if (ctxt->name##Tab == NULL) { \
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000123 xmlGenericError(xmlGenericErrorContext, \
124 "realloc failed !\n"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000125 return(0); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000126 } \
127 } \
128 ctxt->name##Tab[ctxt->name##Nr] = value; \
129 ctxt->name = value; \
130 return(ctxt->name##Nr++); \
131} \
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000132scope type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000133 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000134 if (ctxt->name##Nr <= 0) return(0); \
135 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000136 if (ctxt->name##Nr > 0) \
137 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
138 else \
139 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000140 ret = ctxt->name##Tab[ctxt->name##Nr]; \
141 ctxt->name##Tab[ctxt->name##Nr] = 0; \
142 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000143} \
144
Daniel Veillardb1059e22000-09-16 14:02:43 +0000145/*
146 * Those macros actually generate the functions
147 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000148PUSH_AND_POP(extern, xmlParserInputPtr, input)
149PUSH_AND_POP(extern, xmlNodePtr, node)
150PUSH_AND_POP(extern, xmlChar*, name)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000151
Daniel Veillardcf461992000-03-14 18:30:20 +0000152int spacePush(xmlParserCtxtPtr ctxt, int val) {
153 if (ctxt->spaceNr >= ctxt->spaceMax) {
154 ctxt->spaceMax *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000155 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
Daniel Veillardcf461992000-03-14 18:30:20 +0000156 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
157 if (ctxt->spaceTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000158 xmlGenericError(xmlGenericErrorContext,
159 "realloc failed !\n");
Daniel Veillardcf461992000-03-14 18:30:20 +0000160 return(0);
161 }
162 }
163 ctxt->spaceTab[ctxt->spaceNr] = val;
164 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
165 return(ctxt->spaceNr++);
166}
167
168int spacePop(xmlParserCtxtPtr ctxt) {
169 int ret;
170 if (ctxt->spaceNr <= 0) return(0);
171 ctxt->spaceNr--;
172 if (ctxt->spaceNr > 0)
173 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
174 else
175 ctxt->space = NULL;
176 ret = ctxt->spaceTab[ctxt->spaceNr];
177 ctxt->spaceTab[ctxt->spaceNr] = -1;
178 return(ret);
179}
180
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000181/*
182 * Macros for accessing the content. Those should be used only by the parser,
183 * and not exported.
184 *
Daniel Veillardb1059e22000-09-16 14:02:43 +0000185 * Dirty macros, i.e. one often need to make assumption on the context to
186 * use them
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000187 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000188 * CUR_PTR return the current pointer to the xmlChar to be parsed.
Daniel Veillardcf461992000-03-14 18:30:20 +0000189 * To be used with extreme caution since operations consuming
190 * characters may move the input buffer to a different location !
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000191 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
Daniel Veillardcf461992000-03-14 18:30:20 +0000192 * This should be used internally by the parser
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000193 * only to compare to ASCII values otherwise it would break when
194 * running with UTF-8 encoding.
Daniel Veillardb1059e22000-09-16 14:02:43 +0000195 * RAW same as CUR but in the input buffer, bypass any token
196 * extraction that may have been done
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000197 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000198 * to compare on ASCII based substring.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000199 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000200 * strings within the parser.
201 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000202 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000203 *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000204 * NEXT Skip to the next character, this does the proper decoding
205 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardb1059e22000-09-16 14:02:43 +0000206 * NEXTL(l) Skip l xmlChars in the input buffer
207 * CUR_CHAR(l) returns the current unicode character (int), set l
208 * to the number of xmlChars used for the encoding [0-5].
209 * CUR_SCHAR same but operate on a string instead of the context
210 * COPY_BUF copy the current unicode char to the target buffer, increment
211 * the index
212 * GROW, SHRINK handling of input buffers
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000213 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000214
Daniel Veillardcf461992000-03-14 18:30:20 +0000215#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000216#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000217#define NXT(val) ctxt->input->cur[(val)]
218#define CUR_PTR ctxt->input->cur
Daniel Veillardcf461992000-03-14 18:30:20 +0000219
Daniel Veillard126f2792000-10-24 17:10:12 +0000220#define SKIP(val) do { \
221 ctxt->nbChars += (val),ctxt->input->cur += (val); \
Daniel Veillardcf461992000-03-14 18:30:20 +0000222 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillardb1059e22000-09-16 14:02:43 +0000223 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
Daniel Veillardcf461992000-03-14 18:30:20 +0000224 if ((*ctxt->input->cur == 0) && \
225 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
Daniel Veillard126f2792000-10-24 17:10:12 +0000226 xmlPopInput(ctxt); \
227 } while (0)
Daniel Veillardcf461992000-03-14 18:30:20 +0000228
Daniel Veillard126f2792000-10-24 17:10:12 +0000229#define SHRINK do { \
230 xmlParserInputShrink(ctxt->input); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000231 if ((*ctxt->input->cur == 0) && \
232 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
Daniel Veillard126f2792000-10-24 17:10:12 +0000233 xmlPopInput(ctxt); \
234 } while (0)
Daniel Veillardb05deb71999-08-10 19:04:08 +0000235
Daniel Veillard126f2792000-10-24 17:10:12 +0000236#define GROW do { \
237 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000238 if ((*ctxt->input->cur == 0) && \
239 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
Daniel Veillard126f2792000-10-24 17:10:12 +0000240 xmlPopInput(ctxt); \
241 } while (0)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000242
Daniel Veillard126f2792000-10-24 17:10:12 +0000243#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000244
Daniel Veillard126f2792000-10-24 17:10:12 +0000245#define NEXT xmlNextChar(ctxt)
Daniel Veillardcf461992000-03-14 18:30:20 +0000246
Daniel Veillard126f2792000-10-24 17:10:12 +0000247#define NEXTL(l) do { \
Daniel Veillardcf461992000-03-14 18:30:20 +0000248 if (*(ctxt->input->cur) == '\n') { \
249 ctxt->input->line++; ctxt->input->col = 1; \
250 } else ctxt->input->col++; \
251 ctxt->token = 0; ctxt->input->cur += l; \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000252 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard126f2792000-10-24 17:10:12 +0000253 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
254 } while (0)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000255
Daniel Veillard126f2792000-10-24 17:10:12 +0000256#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
257#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
Daniel Veillardcf461992000-03-14 18:30:20 +0000258
259#define COPY_BUF(l,b,i,v) \
260 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard126f2792000-10-24 17:10:12 +0000261 else i += xmlCopyChar(l,&b[i],v)
Daniel Veillardcf461992000-03-14 18:30:20 +0000262
263/**
Daniel Veillardcf461992000-03-14 18:30:20 +0000264 * xmlSkipBlankChars:
265 * @ctxt: the XML parser context
266 *
267 * skip all blanks character found at that point in the input streams.
268 * It pops up finished entities in the process if allowable at that point.
269 *
270 * Returns the number of space chars skipped
271 */
272
273int
274xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
275 int cur, res = 0;
276
Daniel Veillarde0854c32000-08-27 21:12:29 +0000277 /*
278 * It's Okay to use CUR/NEXT here since all the blanks are on
279 * the ASCII range.
280 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000281 do {
282 cur = CUR;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000283 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +0000284 NEXT;
285 cur = CUR;
286 res++;
287 }
288 while ((cur == 0) && (ctxt->inputNr > 1) &&
289 (ctxt->instate != XML_PARSER_COMMENT)) {
290 xmlPopInput(ctxt);
291 cur = CUR;
292 }
Daniel Veillarde0854c32000-08-27 21:12:29 +0000293 /*
294 * Need to handle support of entities branching here
295 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000296 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillardb1059e22000-09-16 14:02:43 +0000297 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
Daniel Veillarde0854c32000-08-27 21:12:29 +0000298 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +0000299 return(res);
300}
Daniel Veillard260a68f1998-08-13 03:39:55 +0000301
Daniel Veillardb05deb71999-08-10 19:04:08 +0000302/************************************************************************
303 * *
Daniel Veillardb1059e22000-09-16 14:02:43 +0000304 * Commodity functions to handle entities *
Daniel Veillardb05deb71999-08-10 19:04:08 +0000305 * *
306 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +0000307
Daniel Veillard11e00581998-10-24 18:27:49 +0000308/**
309 * xmlPopInput:
310 * @ctxt: an XML parser context
311 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000312 * xmlPopInput: the current input pointed by ctxt->input came to an end
313 * pop it and return the next char.
314 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000315 * Returns the current xmlChar in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000316 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000317xmlChar
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000318xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000319 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000320 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000321 xmlGenericError(xmlGenericErrorContext,
322 "Popping input %d\n", ctxt->inputNr);
Daniel Veillardbc50b591999-03-01 12:28:53 +0000323 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000324 if ((*ctxt->input->cur == 0) &&
325 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
326 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000327 return(CUR);
328}
329
Daniel Veillard11e00581998-10-24 18:27:49 +0000330/**
331 * xmlPushInput:
332 * @ctxt: an XML parser context
333 * @input: an XML parser input fragment (entity, XML fragment ...).
334 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000335 * xmlPushInput: switch to a new input stream which is stacked on top
336 * of the previous one(s).
337 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000338void
339xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000340 if (input == NULL) return;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000341
342 if (xmlParserDebugEntities) {
343 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000344 xmlGenericError(xmlGenericErrorContext,
345 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000346 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000347 xmlGenericError(xmlGenericErrorContext,
348 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000349 }
Daniel Veillard260a68f1998-08-13 03:39:55 +0000350 inputPush(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +0000351 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000352}
353
Daniel Veillard11e00581998-10-24 18:27:49 +0000354/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000355 * xmlParseCharRef:
356 * @ctxt: an XML parser context
357 *
358 * parse Reference declarations
359 *
360 * [66] CharRef ::= '&#' [0-9]+ ';' |
361 * '&#x' [0-9a-fA-F]+ ';'
362 *
363 * [ WFC: Legal Character ]
364 * Characters referred to using character references must match the
365 * production for Char.
366 *
Daniel Veillard10a2c651999-12-12 13:03:50 +0000367 * Returns the value parsed (as an int), 0 in case of error
Daniel Veillardb05deb71999-08-10 19:04:08 +0000368 */
369int
370xmlParseCharRef(xmlParserCtxtPtr ctxt) {
371 int val = 0;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000372 int count = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000373
374 if (ctxt->token != 0) {
375 val = ctxt->token;
376 ctxt->token = 0;
377 return(val);
378 }
Daniel Veillarde0854c32000-08-27 21:12:29 +0000379 /*
380 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
381 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000382 if ((RAW == '&') && (NXT(1) == '#') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +0000383 (NXT(2) == 'x')) {
384 SKIP(3);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000385 GROW;
386 while (RAW != ';') { /* loop blocked by count */
387 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000388 val = val * 16 + (CUR - '0');
Daniel Veillarde0854c32000-08-27 21:12:29 +0000389 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000390 val = val * 16 + (CUR - 'a') + 10;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000391 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000392 val = val * 16 + (CUR - 'A') + 10;
393 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000394 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
396 ctxt->sax->error(ctxt->userData,
397 "xmlParseCharRef: invalid hexadecimal value\n");
398 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000399 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000400 val = 0;
401 break;
402 }
403 NEXT;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000404 count++;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000405 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000406 if (RAW == ';') {
407 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
408 ctxt->nbChars ++;
409 ctxt->input->cur++;
410 }
411 } else if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000412 SKIP(2);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000413 GROW;
414 while (RAW != ';') { /* loop blocked by count */
415 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000416 val = val * 10 + (CUR - '0');
417 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000418 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
420 ctxt->sax->error(ctxt->userData,
421 "xmlParseCharRef: invalid decimal value\n");
422 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000423 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000424 val = 0;
425 break;
426 }
427 NEXT;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000428 count++;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000429 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000430 if (RAW == ';') {
431 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
432 ctxt->nbChars ++;
433 ctxt->input->cur++;
434 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000435 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000436 ctxt->errNo = XML_ERR_INVALID_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
438 ctxt->sax->error(ctxt->userData,
439 "xmlParseCharRef: invalid value\n");
440 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000441 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000442 }
443
444 /*
445 * [ WFC: Legal Character ]
446 * Characters referred to using character references must match the
447 * production for Char.
448 */
449 if (IS_CHAR(val)) {
450 return(val);
451 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000452 ctxt->errNo = XML_ERR_INVALID_CHAR;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000454 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
Daniel Veillardb05deb71999-08-10 19:04:08 +0000455 val);
456 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000457 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000458 }
459 return(0);
460}
461
462/**
Daniel Veillard10a2c651999-12-12 13:03:50 +0000463 * xmlParseStringCharRef:
464 * @ctxt: an XML parser context
465 * @str: a pointer to an index in the string
466 *
467 * parse Reference declarations, variant parsing from a string rather
468 * than an an input flow.
469 *
470 * [66] CharRef ::= '&#' [0-9]+ ';' |
471 * '&#x' [0-9a-fA-F]+ ';'
472 *
473 * [ WFC: Legal Character ]
474 * Characters referred to using character references must match the
475 * production for Char.
476 *
477 * Returns the value parsed (as an int), 0 in case of error, str will be
478 * updated to the current value of the index
479 */
480int
481xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
482 const xmlChar *ptr;
483 xmlChar cur;
484 int val = 0;
485
486 if ((str == NULL) || (*str == NULL)) return(0);
487 ptr = *str;
488 cur = *ptr;
Daniel Veillard0caf07a1999-12-21 16:25:49 +0000489 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000490 ptr += 3;
491 cur = *ptr;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000492 while (cur != ';') { /* Non input consuming loop */
Daniel Veillard10a2c651999-12-12 13:03:50 +0000493 if ((cur >= '0') && (cur <= '9'))
494 val = val * 16 + (cur - '0');
495 else if ((cur >= 'a') && (cur <= 'f'))
496 val = val * 16 + (cur - 'a') + 10;
497 else if ((cur >= 'A') && (cur <= 'F'))
498 val = val * 16 + (cur - 'A') + 10;
499 else {
500 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
502 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +0000503 "xmlParseStringCharRef: invalid hexadecimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +0000504 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000505 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000506 val = 0;
507 break;
508 }
509 ptr++;
510 cur = *ptr;
511 }
512 if (cur == ';')
513 ptr++;
Daniel Veillard0142b842000-01-14 14:45:24 +0000514 } else if ((cur == '&') && (ptr[1] == '#')){
Daniel Veillard10a2c651999-12-12 13:03:50 +0000515 ptr += 2;
516 cur = *ptr;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000517 while (cur != ';') { /* Non input consuming loops */
Daniel Veillard10a2c651999-12-12 13:03:50 +0000518 if ((cur >= '0') && (cur <= '9'))
519 val = val * 10 + (cur - '0');
520 else {
521 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
523 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +0000524 "xmlParseStringCharRef: invalid decimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +0000525 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000526 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000527 val = 0;
528 break;
529 }
530 ptr++;
531 cur = *ptr;
532 }
533 if (cur == ';')
534 ptr++;
535 } else {
536 ctxt->errNo = XML_ERR_INVALID_CHARREF;
537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
538 ctxt->sax->error(ctxt->userData,
539 "xmlParseCharRef: invalid value\n");
540 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000541 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000542 return(0);
543 }
544 *str = ptr;
545
546 /*
547 * [ WFC: Legal Character ]
548 * Characters referred to using character references must match the
549 * production for Char.
550 */
551 if (IS_CHAR(val)) {
552 return(val);
553 } else {
554 ctxt->errNo = XML_ERR_INVALID_CHAR;
555 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
556 ctxt->sax->error(ctxt->userData,
557 "CharRef: invalid xmlChar value %d\n", val);
558 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000559 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000560 }
561 return(0);
562}
563
564/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000565 * xmlParserHandlePEReference:
566 * @ctxt: the parser context
567 *
568 * [69] PEReference ::= '%' Name ';'
569 *
570 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +0000571 * A parsed entity must not contain a recursive
Daniel Veillardb05deb71999-08-10 19:04:08 +0000572 * reference to itself, either directly or indirectly.
573 *
574 * [ WFC: Entity Declared ]
575 * In a document without any DTD, a document with only an internal DTD
576 * subset which contains no parameter entity references, or a document
577 * with "standalone='yes'", ... ... The declaration of a parameter
578 * entity must precede any reference to it...
579 *
580 * [ VC: Entity Declared ]
581 * In a document with an external subset or external parameter entities
582 * with "standalone='no'", ... ... The declaration of a parameter entity
583 * must precede any reference to it...
584 *
585 * [ WFC: In DTD ]
586 * Parameter-entity references may only appear in the DTD.
587 * NOTE: misleading but this is handled.
588 *
589 * A PEReference may have been detected in the current input stream
590 * the handling is done accordingly to
591 * http://www.w3.org/TR/REC-xml#entproc
592 * i.e.
593 * - Included in literal in entity values
594 * - Included as Paraemeter Entity reference within DTDs
595 */
596void
597xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000598 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000599 xmlEntityPtr entity = NULL;
600 xmlParserInputPtr input;
601
Daniel Veillard35008381999-10-25 13:15:52 +0000602 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +0000603 return;
604 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000605 if (RAW != '%') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000606 switch(ctxt->instate) {
607 case XML_PARSER_CDATA_SECTION:
608 return;
609 case XML_PARSER_COMMENT:
610 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000611 case XML_PARSER_START_TAG:
612 return;
613 case XML_PARSER_END_TAG:
614 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000615 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000616 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
618 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
619 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000620 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000621 return;
622 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000623 case XML_PARSER_START:
624 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000625 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
627 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
628 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000629 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000630 return;
631 case XML_PARSER_ENTITY_DECL:
632 case XML_PARSER_CONTENT:
633 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000634 case XML_PARSER_PI:
Daniel Veillardcf461992000-03-14 18:30:20 +0000635 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardb05deb71999-08-10 19:04:08 +0000636 /* we just ignore it there */
637 return;
638 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000639 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
641 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
642 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000643 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000644 return;
645 case XML_PARSER_ENTITY_VALUE:
646 /*
647 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +0000648 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +0000649 * entity value to be able to save the internal
650 * subset of the document.
Daniel Veillarde0854c32000-08-27 21:12:29 +0000651 * This will be handled by xmlStringDecodeEntities
Daniel Veillardb05deb71999-08-10 19:04:08 +0000652 */
653 return;
654 case XML_PARSER_DTD:
655 /*
656 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
657 * In the internal DTD subset, parameter-entity references
658 * can occur only where markup declarations can occur, not
659 * within markup declarations.
660 * In that case this is handled in xmlParseMarkupDecl
661 */
662 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
663 return;
Daniel Veillard41e06512000-11-13 11:47:47 +0000664 break;
665 case XML_PARSER_IGNORE:
666 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000667 }
668
669 NEXT;
670 name = xmlParseName(ctxt);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000671 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000672 xmlGenericError(xmlGenericErrorContext,
673 "PE Reference: %s\n", name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000674 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000675 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
677 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
678 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000679 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000680 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +0000681 if (RAW == ';') {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000682 NEXT;
683 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
684 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
685 if (entity == NULL) {
686
687 /*
688 * [ WFC: Entity Declared ]
689 * In a document without any DTD, a document with only an
690 * internal DTD subset which contains no parameter entity
691 * references, or a document with "standalone='yes'", ...
692 * ... The declaration of a parameter entity must precede
693 * any reference to it...
694 */
695 if ((ctxt->standalone == 1) ||
696 ((ctxt->hasExternalSubset == 0) &&
697 (ctxt->hasPErefs == 0))) {
698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
699 ctxt->sax->error(ctxt->userData,
700 "PEReference: %%%s; not found\n", name);
701 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000702 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000703 } else {
704 /*
705 * [ VC: Entity Declared ]
706 * In a document with an external subset or external
707 * parameter entities with "standalone='no'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000711 if ((!ctxt->disableSAX) &&
712 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
Daniel Veillard87b95392000-08-12 21:12:04 +0000713 ctxt->vctxt.error(ctxt->vctxt.userData,
714 "PEReference: %%%s; not found\n", name);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000715 } else if ((!ctxt->disableSAX) &&
716 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000717 ctxt->sax->warning(ctxt->userData,
718 "PEReference: %%%s; not found\n", name);
719 ctxt->valid = 0;
720 }
721 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +0000722 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
723 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000724 /*
Daniel Veillardb1059e22000-09-16 14:02:43 +0000725 * handle the extra spaces added before and after
Daniel Veillardb05deb71999-08-10 19:04:08 +0000726 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardb1059e22000-09-16 14:02:43 +0000727 * this is done independantly.
Daniel Veillardb05deb71999-08-10 19:04:08 +0000728 */
729 input = xmlNewEntityInputStream(ctxt, entity);
730 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +0000731 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
732 (RAW == '<') && (NXT(1) == '?') &&
733 (NXT(2) == 'x') && (NXT(3) == 'm') &&
734 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
735 xmlParseTextDecl(ctxt);
736 }
737 if (ctxt->token == 0)
738 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +0000739 } else {
740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
741 ctxt->sax->error(ctxt->userData,
742 "xmlHandlePEReference: %s is not a parameter entity\n",
743 name);
744 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000745 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000746 }
747 }
748 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000749 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
751 ctxt->sax->error(ctxt->userData,
752 "xmlHandlePEReference: expecting ';'\n");
753 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000754 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000755 }
Daniel Veillard6454aec1999-09-02 22:04:43 +0000756 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000757 }
758}
759
Daniel Veillard011b63c1999-06-02 17:44:04 +0000760/*
761 * Macro used to grow the current buffer.
762 */
763#define growBuffer(buffer) { \
764 buffer##_size *= 2; \
Daniel Veillard0142b842000-01-14 14:45:24 +0000765 buffer = (xmlChar *) \
766 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +0000767 if (buffer == NULL) { \
768 perror("realloc failed"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000769 return(NULL); \
Daniel Veillard011b63c1999-06-02 17:44:04 +0000770 } \
771}
772
Daniel Veillard011b63c1999-06-02 17:44:04 +0000773/**
Daniel Veillard10a2c651999-12-12 13:03:50 +0000774 * xmlStringDecodeEntities:
775 * @ctxt: the parser context
776 * @str: the input string
777 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
778 * @end: an end marker xmlChar, 0 if none
779 * @end2: an end marker xmlChar, 0 if none
780 * @end3: an end marker xmlChar, 0 if none
781 *
Daniel Veillarde0854c32000-08-27 21:12:29 +0000782 * Takes a entity string content and process to do the adequate subtitutions.
783 *
Daniel Veillard10a2c651999-12-12 13:03:50 +0000784 * [67] Reference ::= EntityRef | CharRef
785 *
786 * [69] PEReference ::= '%' Name ';'
787 *
788 * Returns A newly allocated string with the substitution done. The caller
789 * must deallocate it !
790 */
791xmlChar *
792xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
793 xmlChar end, xmlChar end2, xmlChar end3) {
794 xmlChar *buffer = NULL;
795 int buffer_size = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000796
797 xmlChar *current = NULL;
798 xmlEntityPtr ent;
Daniel Veillardcf461992000-03-14 18:30:20 +0000799 int c,l;
800 int nbchars = 0;
801
Daniel Veillard87b95392000-08-12 21:12:04 +0000802 if (str == NULL)
803 return(NULL);
804
Daniel Veillardcf461992000-03-14 18:30:20 +0000805 if (ctxt->depth > 40) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +0000806 ctxt->errNo = XML_ERR_ENTITY_LOOP;
Daniel Veillardcf461992000-03-14 18:30:20 +0000807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
808 ctxt->sax->error(ctxt->userData,
809 "Detected entity reference loop\n");
810 ctxt->wellFormed = 0;
811 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +0000812 return(NULL);
813 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000814
815 /*
816 * allocate a translation buffer.
817 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000818 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000819 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
820 if (buffer == NULL) {
821 perror("xmlDecodeEntities: malloc failed");
822 return(NULL);
823 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000824
825 /*
826 * Ok loop until we reach one of the ending char or a size limit.
Daniel Veillarde0854c32000-08-27 21:12:29 +0000827 * we are operating on already parsed values.
Daniel Veillard10a2c651999-12-12 13:03:50 +0000828 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000829 c = CUR_SCHAR(str, l);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000830 while ((c != 0) && (c != end) && /* non input consuming loop */
831 (c != end2) && (c != end3)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000832
Daniel Veillardcf461992000-03-14 18:30:20 +0000833 if (c == 0) break;
834 if ((c == '&') && (str[1] == '#')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000835 int val = xmlParseStringCharRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +0000836 if (val != 0) {
837 COPY_BUF(0,buffer,nbchars,val);
838 }
839 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000840 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000841 xmlGenericError(xmlGenericErrorContext,
842 "String decoding Entity Reference: %.30s\n",
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000843 str);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000844 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000845 if ((ent != NULL) &&
846 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard52402ce2000-08-22 23:36:12 +0000847 if (ent->content != NULL) {
848 COPY_BUF(0,buffer,nbchars,ent->content[0]);
849 } else {
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData,
852 "internal error entity has no content\n");
853 }
854 } else if ((ent != NULL) && (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000855 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000856
Daniel Veillardcf461992000-03-14 18:30:20 +0000857 ctxt->depth++;
858 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
859 0, 0, 0);
860 ctxt->depth--;
861 if (rep != NULL) {
862 current = rep;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000863 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +0000864 buffer[nbchars++] = *current++;
865 if (nbchars >
866 buffer_size - XML_PARSER_BUFFER_SIZE) {
867 growBuffer(buffer);
868 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000869 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000870 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000871 }
872 } else if (ent != NULL) {
873 int i = xmlStrlen(ent->name);
874 const xmlChar *cur = ent->name;
875
Daniel Veillardcf461992000-03-14 18:30:20 +0000876 buffer[nbchars++] = '&';
877 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000878 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000879 }
880 for (;i > 0;i--)
Daniel Veillardcf461992000-03-14 18:30:20 +0000881 buffer[nbchars++] = *cur++;
882 buffer[nbchars++] = ';';
Daniel Veillard10a2c651999-12-12 13:03:50 +0000883 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000884 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000885 if (xmlParserDebugEntities)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000886 xmlGenericError(xmlGenericErrorContext,
887 "String decoding PE Reference: %.30s\n", str);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000888 ent = xmlParseStringPEReference(ctxt, &str);
889 if (ent != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000890 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000891
Daniel Veillardcf461992000-03-14 18:30:20 +0000892 ctxt->depth++;
893 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
894 0, 0, 0);
895 ctxt->depth--;
896 if (rep != NULL) {
897 current = rep;
Daniel Veillarde0854c32000-08-27 21:12:29 +0000898 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +0000899 buffer[nbchars++] = *current++;
900 if (nbchars >
901 buffer_size - XML_PARSER_BUFFER_SIZE) {
902 growBuffer(buffer);
903 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000904 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000905 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000906 }
907 }
908 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +0000909 COPY_BUF(l,buffer,nbchars,c);
910 str += l;
911 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000912 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000913 }
Daniel Veillard10a2c651999-12-12 13:03:50 +0000914 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000915 c = CUR_SCHAR(str, l);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000916 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000917 buffer[nbchars++] = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000918 return(buffer);
919}
920
Daniel Veillard260a68f1998-08-13 03:39:55 +0000921
922/************************************************************************
923 * *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000924 * Commodity functions to handle xmlChars *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000925 * *
926 ************************************************************************/
927
Daniel Veillard11e00581998-10-24 18:27:49 +0000928/**
929 * xmlStrndup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000930 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +0000931 * @len: the len of @cur
932 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000933 * a strndup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000934 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000935 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000936 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000937xmlChar *
938xmlStrndup(const xmlChar *cur, int len) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000939 xmlChar *ret;
940
941 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000942 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000943 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000944 xmlGenericError(xmlGenericErrorContext,
945 "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000946 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000947 return(NULL);
948 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000949 memcpy(ret, cur, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000950 ret[len] = 0;
951 return(ret);
952}
953
Daniel Veillard11e00581998-10-24 18:27:49 +0000954/**
955 * xmlStrdup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000956 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +0000957 *
Daniel Veillardcf461992000-03-14 18:30:20 +0000958 * a strdup for array of xmlChar's. Since they are supposed to be
959 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
960 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000961 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000962 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000963 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000964xmlChar *
965xmlStrdup(const xmlChar *cur) {
966 const xmlChar *p = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000967
Daniel Veillard10a2c651999-12-12 13:03:50 +0000968 if (cur == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +0000969 while (*p != 0) p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000970 return(xmlStrndup(cur, p - cur));
971}
972
Daniel Veillard11e00581998-10-24 18:27:49 +0000973/**
974 * xmlCharStrndup:
975 * @cur: the input char *
976 * @len: the len of @cur
977 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000978 * a strndup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +0000979 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000980 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000981 */
982
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000983xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000984xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000985 int i;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000986 xmlChar *ret;
987
988 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000989 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000990 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000991 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000992 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000993 return(NULL);
994 }
995 for (i = 0;i < len;i++)
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000996 ret[i] = (xmlChar) cur[i];
Daniel Veillard260a68f1998-08-13 03:39:55 +0000997 ret[len] = 0;
998 return(ret);
999}
1000
Daniel Veillard11e00581998-10-24 18:27:49 +00001001/**
1002 * xmlCharStrdup:
1003 * @cur: the input char *
1004 * @len: the len of @cur
1005 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001006 * a strdup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001007 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001008 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001009 */
1010
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001011xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001012xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001013 const char *p = cur;
1014
Daniel Veillard10a2c651999-12-12 13:03:50 +00001015 if (cur == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001016 while (*p != '\0') p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001017 return(xmlCharStrndup(cur, p - cur));
1018}
1019
Daniel Veillard11e00581998-10-24 18:27:49 +00001020/**
1021 * xmlStrcmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001022 * @str1: the first xmlChar *
1023 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001024 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001025 * a strcmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001026 *
1027 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001028 */
1029
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001030int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001031xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001032 register int tmp;
1033
Daniel Veillardb1059e22000-09-16 14:02:43 +00001034 if (str1 == str2) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001035 if (str1 == NULL) return(-1);
1036 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001037 do {
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001038 tmp = *str1++ - *str2;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001039 if (tmp != 0) return(tmp);
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001040 } while (*str2++ != 0);
1041 return 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001042}
1043
Daniel Veillard11e00581998-10-24 18:27:49 +00001044/**
Daniel Veillard8b5dd832000-10-01 20:28:44 +00001045 * xmlStrEqual:
1046 * @str1: the first xmlChar *
1047 * @str2: the second xmlChar *
1048 *
1049 * Check if both string are equal of have same content
1050 * Should be a bit more readable and faster than xmlStrEqual()
1051 *
1052 * Returns 1 if they are equal, 0 if they are different
1053 */
1054
1055int
1056xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1057 if (str1 == str2) return(1);
1058 if (str1 == NULL) return(0);
1059 if (str2 == NULL) return(0);
1060 do {
1061 if (*str1++ != *str2) return(0);
1062 } while (*str2++);
1063 return(1);
1064}
1065
1066/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001067 * xmlStrncmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001068 * @str1: the first xmlChar *
1069 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001070 * @len: the max comparison length
1071 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001072 * a strncmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001073 *
1074 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001075 */
1076
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001077int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001078xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001079 register int tmp;
1080
1081 if (len <= 0) return(0);
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001082 if (str1 == str2) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001083 if (str1 == NULL) return(-1);
1084 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001085 do {
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001086 tmp = *str1++ - *str2;
1087 if (tmp != 0 || --len == 0) return(tmp);
1088 } while (*str2++ != 0);
1089 return 0;
1090}
1091
1092static xmlChar casemap[256] = {
1093 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1094 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1095 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1096 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1097 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1098 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1099 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1100 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1101 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1102 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1103 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1104 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1105 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1106 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1107 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1108 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1109 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1110 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1111 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1112 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1113 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1114 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1115 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1116 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1117 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1118 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1119 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1120 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1121 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1122 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1123 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1124 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1125};
1126
1127/**
1128 * xmlStrcasecmp:
1129 * @str1: the first xmlChar *
1130 * @str2: the second xmlChar *
1131 *
1132 * a strcasecmp for xmlChar's
1133 *
1134 * Returns the integer result of the comparison
1135 */
1136
1137int
1138xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1139 register int tmp;
1140
1141 if (str1 == str2) return(0);
1142 if (str1 == NULL) return(-1);
1143 if (str2 == NULL) return(1);
1144 do {
1145 tmp = casemap[*str1++] - casemap[*str2];
Daniel Veillard260a68f1998-08-13 03:39:55 +00001146 if (tmp != 0) return(tmp);
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001147 } while (*str2++ != 0);
1148 return 0;
1149}
1150
1151/**
1152 * xmlStrncasecmp:
1153 * @str1: the first xmlChar *
1154 * @str2: the second xmlChar *
1155 * @len: the max comparison length
1156 *
1157 * a strncasecmp for xmlChar's
1158 *
1159 * Returns the integer result of the comparison
1160 */
1161
1162int
1163xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1164 register int tmp;
1165
1166 if (len <= 0) return(0);
1167 if (str1 == str2) return(0);
1168 if (str1 == NULL) return(-1);
1169 if (str2 == NULL) return(1);
1170 do {
1171 tmp = casemap[*str1++] - casemap[*str2];
1172 if (tmp != 0 || --len == 0) return(tmp);
1173 } while (*str2++ != 0);
1174 return 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001175}
1176
Daniel Veillard11e00581998-10-24 18:27:49 +00001177/**
1178 * xmlStrchr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001179 * @str: the xmlChar * array
1180 * @val: the xmlChar to search
Daniel Veillard11e00581998-10-24 18:27:49 +00001181 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001182 * a strchr for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001183 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001184 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001185 */
1186
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001187const xmlChar *
1188xmlStrchr(const xmlChar *str, xmlChar val) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001189 if (str == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001190 while (*str != 0) { /* non input consuming */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001191 if (*str == val) return((xmlChar *) str);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001192 str++;
1193 }
1194 return(NULL);
1195}
1196
Daniel Veillard11e00581998-10-24 18:27:49 +00001197/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001198 * xmlStrstr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001199 * @str: the xmlChar * array (haystack)
1200 * @val: the xmlChar to search (needle)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001201 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001202 * a strstr for xmlChar's
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001203 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001204 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001205 */
1206
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001207const xmlChar *
1208xmlStrstr(const xmlChar *str, xmlChar *val) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001209 int n;
1210
1211 if (str == NULL) return(NULL);
1212 if (val == NULL) return(NULL);
1213 n = xmlStrlen(val);
1214
1215 if (n == 0) return(str);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001216 while (*str != 0) { /* non input consuming */
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001217 if (*str == *val) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001218 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001219 }
1220 str++;
1221 }
1222 return(NULL);
1223}
1224
1225/**
Daniel Veillardb656ebe2000-09-22 13:51:48 +00001226 * xmlStrcasestr:
1227 * @str: the xmlChar * array (haystack)
1228 * @val: the xmlChar to search (needle)
1229 *
1230 * a case-ignoring strstr for xmlChar's
1231 *
1232 * Returns the xmlChar * for the first occurence or NULL.
1233 */
1234
1235const xmlChar *
1236xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1237 int n;
1238
1239 if (str == NULL) return(NULL);
1240 if (val == NULL) return(NULL);
1241 n = xmlStrlen(val);
1242
1243 if (n == 0) return(str);
1244 while (*str != 0) { /* non input consuming */
1245 if (casemap[*str] == casemap[*val])
1246 if (!xmlStrncasecmp(str, val, n)) return(str);
1247 str++;
1248 }
1249 return(NULL);
1250}
1251
1252/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001253 * xmlStrsub:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001254 * @str: the xmlChar * array (haystack)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001255 * @start: the index of the first char (zero based)
1256 * @len: the length of the substring
1257 *
1258 * Extract a substring of a given string
1259 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001260 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001261 */
1262
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001263xmlChar *
1264xmlStrsub(const xmlChar *str, int start, int len) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001265 int i;
1266
1267 if (str == NULL) return(NULL);
1268 if (start < 0) return(NULL);
1269 if (len < 0) return(NULL);
1270
1271 for (i = 0;i < start;i++) {
1272 if (*str == 0) return(NULL);
1273 str++;
1274 }
1275 if (*str == 0) return(NULL);
1276 return(xmlStrndup(str, len));
1277}
1278
1279/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001280 * xmlStrlen:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001281 * @str: the xmlChar * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001282 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00001283 * length of a xmlChar's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00001284 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001285 * Returns the number of xmlChar contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001286 */
1287
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001288int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001289xmlStrlen(const xmlChar *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001290 int len = 0;
1291
1292 if (str == NULL) return(0);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001293 while (*str != 0) { /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001294 str++;
1295 len++;
1296 }
1297 return(len);
1298}
1299
Daniel Veillard11e00581998-10-24 18:27:49 +00001300/**
1301 * xmlStrncat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001302 * @cur: the original xmlChar * array
1303 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00001304 * @len: the length of @add
1305 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001306 * a strncat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001307 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001308 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001309 */
1310
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001311xmlChar *
1312xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001313 int size;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001314 xmlChar *ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001315
1316 if ((add == NULL) || (len == 0))
1317 return(cur);
1318 if (cur == NULL)
1319 return(xmlStrndup(add, len));
1320
1321 size = xmlStrlen(cur);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001322 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001323 if (ret == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001324 xmlGenericError(xmlGenericErrorContext,
1325 "xmlStrncat: realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001326 (size + len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001327 return(cur);
1328 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001329 memcpy(&ret[size], add, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001330 ret[size + len] = 0;
1331 return(ret);
1332}
1333
Daniel Veillard11e00581998-10-24 18:27:49 +00001334/**
1335 * xmlStrcat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001336 * @cur: the original xmlChar * array
1337 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00001338 *
Daniel Veillardcf461992000-03-14 18:30:20 +00001339 * a strcat for array of xmlChar's. Since they are supposed to be
1340 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1341 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00001342 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001343 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001344 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001345xmlChar *
1346xmlStrcat(xmlChar *cur, const xmlChar *add) {
1347 const xmlChar *p = add;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001348
1349 if (add == NULL) return(cur);
1350 if (cur == NULL)
1351 return(xmlStrdup(add));
1352
Daniel Veillarde0854c32000-08-27 21:12:29 +00001353 while (*p != 0) p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001354 return(xmlStrncat(cur, add, p - add));
1355}
1356
1357/************************************************************************
1358 * *
1359 * Commodity functions, cleanup needed ? *
1360 * *
1361 ************************************************************************/
1362
Daniel Veillard11e00581998-10-24 18:27:49 +00001363/**
1364 * areBlanks:
1365 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001366 * @str: a xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001367 * @len: the size of @str
1368 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001369 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00001370 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00001371 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001372 */
1373
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001374static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001375 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001376 xmlNodePtr lastChild;
1377
Daniel Veillard83a30e72000-03-02 03:33:32 +00001378 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001379 * Check for xml:space value.
1380 */
1381 if (*(ctxt->space) == 1)
1382 return(0);
1383
1384 /*
Daniel Veillard83a30e72000-03-02 03:33:32 +00001385 * Check that the string is made of blanks
1386 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001387 for (i = 0;i < len;i++)
1388 if (!(IS_BLANK(str[i]))) return(0);
1389
Daniel Veillard83a30e72000-03-02 03:33:32 +00001390 /*
1391 * Look if the element is mixed content in the Dtd if available
1392 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001393 if (ctxt->myDoc != NULL) {
1394 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1395 if (ret == 0) return(1);
1396 if (ret == 1) return(0);
1397 }
Daniel Veillard83a30e72000-03-02 03:33:32 +00001398
Daniel Veillardb05deb71999-08-10 19:04:08 +00001399 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001400 * Otherwise, heuristic :-\
Daniel Veillardb05deb71999-08-10 19:04:08 +00001401 */
Daniel Veillard83a30e72000-03-02 03:33:32 +00001402 if (ctxt->keepBlanks)
1403 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00001404 if (RAW != '<') return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00001405 if (ctxt->node == NULL) return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00001406 if ((ctxt->node->children == NULL) &&
1407 (RAW == '<') && (NXT(1) == '/')) return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00001408
Daniel Veillard260a68f1998-08-13 03:39:55 +00001409 lastChild = xmlGetLastChild(ctxt->node);
1410 if (lastChild == NULL) {
Daniel Veillard90fb02c2000-03-02 03:46:43 +00001411 if (ctxt->node->content != NULL) return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001412 } else if (xmlNodeIsText(lastChild))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00001413 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00001414 else if ((ctxt->node->children != NULL) &&
1415 (xmlNodeIsText(ctxt->node->children)))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00001416 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001417 return(1);
1418}
1419
Daniel Veillard260a68f1998-08-13 03:39:55 +00001420/*
1421 * Forward definition for recusive behaviour.
1422 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00001423void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1424void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001425
1426/************************************************************************
1427 * *
1428 * Extra stuff for namespace support *
1429 * Relates to http://www.w3.org/TR/WD-xml-names *
1430 * *
1431 ************************************************************************/
1432
Daniel Veillard11e00581998-10-24 18:27:49 +00001433/**
Daniel Veillarde0854c32000-08-27 21:12:29 +00001434 * xmlSplitQName:
1435 * @ctxt: an XML parser context
1436 * @name: an XML parser context
1437 * @prefix: a xmlChar **
1438 *
1439 * parse an UTF8 encoded XML qualified name string
1440 *
1441 * [NS 5] QName ::= (Prefix ':')? LocalPart
1442 *
1443 * [NS 6] Prefix ::= NCName
1444 *
1445 * [NS 7] LocalPart ::= NCName
1446 *
1447 * Returns the local part, and prefix is updated
1448 * to get the Prefix if any.
1449 */
1450
1451xmlChar *
1452xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1453 xmlChar buf[XML_MAX_NAMELEN + 5];
1454 xmlChar *buffer = NULL;
1455 int len = 0;
1456 int max = XML_MAX_NAMELEN;
1457 xmlChar *ret = NULL;
1458 const xmlChar *cur = name;
1459 int c;
1460
1461 *prefix = NULL;
1462
1463 /* xml: prefix is not really a namespace */
1464 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1465 (cur[2] == 'l') && (cur[3] == ':'))
1466 return(xmlStrdup(name));
1467
1468 /* nasty but valid */
1469 if (cur[0] == ':')
1470 return(xmlStrdup(name));
1471
1472 c = *cur++;
1473 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1474 buf[len++] = c;
1475 c = *cur++;
1476 }
1477 if (len >= max) {
1478 /*
1479 * Okay someone managed to make a huge name, so he's ready to pay
1480 * for the processing speed.
1481 */
1482 max = len * 2;
1483
1484 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1485 if (buffer == NULL) {
1486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1487 ctxt->sax->error(ctxt->userData,
1488 "xmlSplitQName: out of memory\n");
1489 return(NULL);
1490 }
1491 memcpy(buffer, buf, len);
1492 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1493 if (len + 10 > max) {
1494 max *= 2;
1495 buffer = (xmlChar *) xmlRealloc(buffer,
1496 max * sizeof(xmlChar));
1497 if (buffer == NULL) {
1498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1499 ctxt->sax->error(ctxt->userData,
1500 "xmlSplitQName: out of memory\n");
1501 return(NULL);
1502 }
1503 }
1504 buffer[len++] = c;
1505 c = *cur++;
1506 }
1507 buffer[len] = 0;
1508 }
1509
1510 if (buffer == NULL)
1511 ret = xmlStrndup(buf, len);
1512 else {
1513 ret = buffer;
1514 buffer = NULL;
1515 max = XML_MAX_NAMELEN;
1516 }
1517
1518
1519 if (c == ':') {
1520 c = *cur++;
1521 if (c == 0) return(ret);
1522 *prefix = ret;
1523 len = 0;
1524
1525 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1526 buf[len++] = c;
1527 c = *cur++;
1528 }
1529 if (len >= max) {
1530 /*
1531 * Okay someone managed to make a huge name, so he's ready to pay
1532 * for the processing speed.
1533 */
1534 max = len * 2;
1535
1536 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1537 if (buffer == NULL) {
1538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1539 ctxt->sax->error(ctxt->userData,
1540 "xmlSplitQName: out of memory\n");
1541 return(NULL);
1542 }
1543 memcpy(buffer, buf, len);
1544 while (c != 0) { /* tested bigname2.xml */
1545 if (len + 10 > max) {
1546 max *= 2;
1547 buffer = (xmlChar *) xmlRealloc(buffer,
1548 max * sizeof(xmlChar));
1549 if (buffer == NULL) {
1550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1551 ctxt->sax->error(ctxt->userData,
1552 "xmlSplitQName: out of memory\n");
1553 return(NULL);
1554 }
1555 }
1556 buffer[len++] = c;
1557 c = *cur++;
1558 }
1559 buffer[len] = 0;
1560 }
1561
1562 if (buffer == NULL)
1563 ret = xmlStrndup(buf, len);
1564 else {
1565 ret = buffer;
1566 }
1567 }
1568
1569 return(ret);
1570}
1571
Daniel Veillard260a68f1998-08-13 03:39:55 +00001572/************************************************************************
1573 * *
1574 * The parser itself *
1575 * Relates to http://www.w3.org/TR/REC-xml *
1576 * *
1577 ************************************************************************/
1578
Daniel Veillard11e00581998-10-24 18:27:49 +00001579/**
1580 * xmlParseName:
1581 * @ctxt: an XML parser context
1582 *
1583 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001584 *
1585 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1586 * CombiningChar | Extender
1587 *
1588 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1589 *
1590 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00001591 *
1592 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001593 */
1594
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001595xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001596xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001597 xmlChar buf[XML_MAX_NAMELEN + 5];
1598 int len = 0, l;
1599 int c;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001600 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001601
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001602 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001603 c = CUR_CHAR(l);
Daniel Veillard5e873c42000-04-12 13:27:38 +00001604 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1605 (!IS_LETTER(c) && (c != '_') &&
1606 (c != ':'))) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001607 return(NULL);
1608 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001609
Daniel Veillarde0854c32000-08-27 21:12:29 +00001610 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard5e873c42000-04-12 13:27:38 +00001611 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1612 (c == '.') || (c == '-') ||
1613 (c == '_') || (c == ':') ||
1614 (IS_COMBINING(c)) ||
1615 (IS_EXTENDER(c)))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001616 if (count++ > 100) {
1617 count = 0;
1618 GROW;
1619 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001620 COPY_BUF(l,buf,len,c);
1621 NEXTL(l);
1622 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001623 if (len >= XML_MAX_NAMELEN) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001624 /*
1625 * Okay someone managed to make a huge name, so he's ready to pay
1626 * for the processing speed.
1627 */
1628 xmlChar *buffer;
1629 int max = len * 2;
1630
1631 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1632 if (buffer == NULL) {
1633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1634 ctxt->sax->error(ctxt->userData,
1635 "xmlParseName: out of memory\n");
1636 return(NULL);
1637 }
1638 memcpy(buffer, buf, len);
1639 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001640 (c == '.') || (c == '-') ||
1641 (c == '_') || (c == ':') ||
1642 (IS_COMBINING(c)) ||
1643 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001644 if (count++ > 100) {
1645 count = 0;
1646 GROW;
1647 }
1648 if (len + 10 > max) {
1649 max *= 2;
1650 buffer = (xmlChar *) xmlRealloc(buffer,
1651 max * sizeof(xmlChar));
1652 if (buffer == NULL) {
1653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654 ctxt->sax->error(ctxt->userData,
1655 "xmlParseName: out of memory\n");
1656 return(NULL);
1657 }
1658 }
1659 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00001660 NEXTL(l);
1661 c = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001662 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001663 buffer[len] = 0;
1664 return(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001665 }
1666 }
1667 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001668}
1669
Daniel Veillard11e00581998-10-24 18:27:49 +00001670/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00001671 * xmlParseStringName:
1672 * @ctxt: an XML parser context
Daniel Veillardb1059e22000-09-16 14:02:43 +00001673 * @str: a pointer to the string pointer (IN/OUT)
Daniel Veillard10a2c651999-12-12 13:03:50 +00001674 *
1675 * parse an XML name.
1676 *
1677 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1678 * CombiningChar | Extender
1679 *
1680 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1681 *
1682 * [6] Names ::= Name (S Name)*
1683 *
1684 * Returns the Name parsed or NULL. The str pointer
1685 * is updated to the current location in the string.
1686 */
1687
1688xmlChar *
1689xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001690 xmlChar buf[XML_MAX_NAMELEN + 5];
1691 const xmlChar *cur = *str;
1692 int len = 0, l;
1693 int c;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001694
Daniel Veillardcf461992000-03-14 18:30:20 +00001695 c = CUR_SCHAR(cur, l);
1696 if (!IS_LETTER(c) && (c != '_') &&
1697 (c != ':')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001698 return(NULL);
1699 }
1700
Daniel Veillarde0854c32000-08-27 21:12:29 +00001701 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001702 (c == '.') || (c == '-') ||
1703 (c == '_') || (c == ':') ||
1704 (IS_COMBINING(c)) ||
1705 (IS_EXTENDER(c))) {
1706 COPY_BUF(l,buf,len,c);
1707 cur += l;
1708 c = CUR_SCHAR(cur, l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001709 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1710 /*
1711 * Okay someone managed to make a huge name, so he's ready to pay
1712 * for the processing speed.
1713 */
1714 xmlChar *buffer;
1715 int max = len * 2;
1716
1717 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1718 if (buffer == NULL) {
1719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1720 ctxt->sax->error(ctxt->userData,
1721 "xmlParseStringName: out of memory\n");
1722 return(NULL);
1723 }
1724 memcpy(buffer, buf, len);
1725 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001726 (c == '.') || (c == '-') ||
1727 (c == '_') || (c == ':') ||
1728 (IS_COMBINING(c)) ||
1729 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001730 if (len + 10 > max) {
1731 max *= 2;
1732 buffer = (xmlChar *) xmlRealloc(buffer,
1733 max * sizeof(xmlChar));
1734 if (buffer == NULL) {
1735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1736 ctxt->sax->error(ctxt->userData,
1737 "xmlParseStringName: out of memory\n");
1738 return(NULL);
1739 }
1740 }
1741 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00001742 cur += l;
1743 c = CUR_SCHAR(cur, l);
1744 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001745 buffer[len] = 0;
1746 *str = cur;
1747 return(buffer);
Daniel Veillardcf461992000-03-14 18:30:20 +00001748 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00001749 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001750 *str = cur;
1751 return(xmlStrndup(buf, len));
Daniel Veillard10a2c651999-12-12 13:03:50 +00001752}
1753
1754/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001755 * xmlParseNmtoken:
1756 * @ctxt: an XML parser context
1757 *
1758 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001759 *
1760 * [7] Nmtoken ::= (NameChar)+
1761 *
1762 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00001763 *
1764 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001765 */
1766
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001767xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001768xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001769 xmlChar buf[XML_MAX_NAMELEN + 5];
1770 int len = 0, l;
1771 int c;
1772 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001773
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001774 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001775 c = CUR_CHAR(l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001776
1777 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001778 (c == '.') || (c == '-') ||
1779 (c == '_') || (c == ':') ||
1780 (IS_COMBINING(c)) ||
1781 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001782 if (count++ > 100) {
1783 count = 0;
1784 GROW;
1785 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001786 COPY_BUF(l,buf,len,c);
1787 NEXTL(l);
1788 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001789 if (len >= XML_MAX_NAMELEN) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001790 /*
1791 * Okay someone managed to make a huge token, so he's ready to pay
1792 * for the processing speed.
1793 */
1794 xmlChar *buffer;
1795 int max = len * 2;
1796
1797 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1798 if (buffer == NULL) {
1799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1800 ctxt->sax->error(ctxt->userData,
1801 "xmlParseNmtoken: out of memory\n");
1802 return(NULL);
1803 }
1804 memcpy(buffer, buf, len);
1805 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001806 (c == '.') || (c == '-') ||
1807 (c == '_') || (c == ':') ||
1808 (IS_COMBINING(c)) ||
1809 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00001810 if (count++ > 100) {
1811 count = 0;
1812 GROW;
1813 }
1814 if (len + 10 > max) {
1815 max *= 2;
1816 buffer = (xmlChar *) xmlRealloc(buffer,
1817 max * sizeof(xmlChar));
1818 if (buffer == NULL) {
1819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1820 ctxt->sax->error(ctxt->userData,
1821 "xmlParseName: out of memory\n");
1822 return(NULL);
1823 }
1824 }
1825 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00001826 NEXTL(l);
1827 c = CUR_CHAR(l);
1828 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001829 buffer[len] = 0;
1830 return(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001831 }
1832 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001833 if (len == 0)
1834 return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001835 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001836}
1837
Daniel Veillard11e00581998-10-24 18:27:49 +00001838/**
1839 * xmlParseEntityValue:
1840 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00001841 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00001842 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00001843 * parse a value for ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00001844 *
1845 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1846 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00001847 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001848 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001849 */
1850
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001851xmlChar *
1852xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001853 xmlChar *buf = NULL;
1854 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001855 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00001856 int c, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001857 xmlChar stop;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001858 xmlChar *ret = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001859 const xmlChar *cur = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001860 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001861
Daniel Veillardcf461992000-03-14 18:30:20 +00001862 if (RAW == '"') stop = '"';
1863 else if (RAW == '\'') stop = '\'';
Daniel Veillard10a2c651999-12-12 13:03:50 +00001864 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001865 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00001867 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001868 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001869 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001870 return(NULL);
1871 }
1872 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1873 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001874 xmlGenericError(xmlGenericErrorContext,
1875 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001876 return(NULL);
1877 }
1878
1879 /*
1880 * The content of the entity definition is copied in a buffer.
1881 */
1882
1883 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1884 input = ctxt->input;
1885 GROW;
1886 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00001887 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001888 /*
1889 * NOTE: 4.4.5 Included in Literal
1890 * When a parameter entity reference appears in a literal entity
1891 * value, ... a single or double quote character in the replacement
1892 * text is always treated as a normal data character and will not
1893 * terminate the literal.
1894 * In practice it means we stop the loop only when back at parsing
1895 * the initial entity and the quote is found
1896 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00001897 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1898 (ctxt->input != input))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001899 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001900 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00001901 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00001902 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00001903 xmlGenericError(xmlGenericErrorContext,
1904 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001905 return(NULL);
1906 }
1907 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001908 COPY_BUF(l,buf,len,c);
1909 NEXTL(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001910 /*
1911 * Pop-up of finished entities.
1912 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00001913 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
Daniel Veillard10a2c651999-12-12 13:03:50 +00001914 xmlPopInput(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00001915
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001916 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001917 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001918 if (c == 0) {
1919 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001920 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001921 }
1922 }
1923 buf[len] = 0;
1924
1925 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001926 * Raise problem w.r.t. '&' and '%' being used in non-entities
1927 * reference constructs. Note Charref will be handled in
1928 * xmlStringDecodeEntities()
1929 */
1930 cur = buf;
Daniel Veillard4948eb42000-08-29 09:41:15 +00001931 while (*cur != 0) { /* non input consuming */
Daniel Veillardcf461992000-03-14 18:30:20 +00001932 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1933 xmlChar *name;
1934 xmlChar tmp = *cur;
1935
1936 cur++;
1937 name = xmlParseStringName(ctxt, &cur);
1938 if ((name == NULL) || (*cur != ';')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00001939 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
Daniel Veillardcf461992000-03-14 18:30:20 +00001940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1941 ctxt->sax->error(ctxt->userData,
1942 "EntityValue: '%c' forbidden except for entities references\n",
1943 tmp);
1944 ctxt->wellFormed = 0;
1945 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001946 }
1947 if ((ctxt->inSubset == 1) && (tmp == '%')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00001948 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1950 ctxt->sax->error(ctxt->userData,
1951 "EntityValue: PEReferences forbidden in internal subset\n",
1952 tmp);
1953 ctxt->wellFormed = 0;
1954 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001955 }
1956 if (name != NULL)
1957 xmlFree(name);
1958 }
1959 cur++;
1960 }
1961
1962 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00001963 * Then PEReference entities are substituted.
1964 */
1965 if (c != stop) {
1966 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1968 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1969 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001970 ctxt->disableSAX = 1;
1971 xmlFree(buf);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001972 } else {
1973 NEXT;
1974 /*
1975 * NOTE: 4.4.7 Bypassed
1976 * When a general entity reference appears in the EntityValue in
1977 * an entity declaration, it is bypassed and left as is.
Daniel Veillardcf461992000-03-14 18:30:20 +00001978 * so XML_SUBSTITUTE_REF is not set here.
Daniel Veillard10a2c651999-12-12 13:03:50 +00001979 */
1980 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1981 0, 0, 0);
1982 if (orig != NULL)
1983 *orig = buf;
1984 else
1985 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001986 }
1987
1988 return(ret);
1989}
1990
Daniel Veillard11e00581998-10-24 18:27:49 +00001991/**
1992 * xmlParseAttValue:
1993 * @ctxt: an XML parser context
1994 *
1995 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00001996 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00001997 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00001998 *
1999 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2000 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002001 *
Daniel Veillard7f858501999-11-17 17:32:38 +00002002 * 3.3.3 Attribute-Value Normalization:
2003 * Before the value of an attribute is passed to the application or
2004 * checked for validity, the XML processor must normalize it as follows:
2005 * - a character reference is processed by appending the referenced
2006 * character to the attribute value
2007 * - an entity reference is processed by recursively processing the
2008 * replacement text of the entity
2009 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2010 * appending #x20 to the normalized value, except that only a single
2011 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2012 * parsed entity or the literal entity value of an internal parsed entity
2013 * - other characters are processed by appending them to the normalized value
Daniel Veillard07136651999-11-18 14:02:05 +00002014 * If the declared value is not CDATA, then the XML processor must further
2015 * process the normalized attribute value by discarding any leading and
2016 * trailing space (#x20) characters, and by replacing sequences of space
2017 * (#x20) characters by a single space (#x20) character.
2018 * All attributes for which no declaration has been read should be treated
2019 * by a non-validating parser as if declared CDATA.
Daniel Veillard7f858501999-11-17 17:32:38 +00002020 *
2021 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002022 */
2023
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002024xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002025xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002026 xmlChar limit = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +00002027 xmlChar *buf = NULL;
2028 int len = 0;
2029 int buf_size = 0;
2030 int c, l;
Daniel Veillard7f858501999-11-17 17:32:38 +00002031 xmlChar *current = NULL;
2032 xmlEntityPtr ent;
Daniel Veillard7f858501999-11-17 17:32:38 +00002033
Daniel Veillard260a68f1998-08-13 03:39:55 +00002034
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002035 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002036 if (NXT(0) == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002037 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard7f858501999-11-17 17:32:38 +00002038 limit = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002039 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00002040 } else if (NXT(0) == '\'') {
Daniel Veillard7f858501999-11-17 17:32:38 +00002041 limit = '\'';
Daniel Veillardb05deb71999-08-10 19:04:08 +00002042 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002043 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002044 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002045 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002047 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002048 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002049 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00002050 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002051 }
2052
Daniel Veillard7f858501999-11-17 17:32:38 +00002053 /*
2054 * allocate a translation buffer.
2055 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002056 buf_size = XML_PARSER_BUFFER_SIZE;
2057 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2058 if (buf == NULL) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002059 perror("xmlParseAttValue: malloc failed");
2060 return(NULL);
2061 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002062
2063 /*
2064 * Ok loop until we reach one of the ending char or a size limit.
2065 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002066 c = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002067 while (((NXT(0) != limit) && /* checked */
2068 (c != '<')) || (ctxt->token != 0)) {
Daniel Veillardbe803962000-06-28 23:40:59 +00002069 if (c == 0) break;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002070 if (ctxt->token == '&') {
Daniel Veillardb1059e22000-09-16 14:02:43 +00002071 /*
2072 * The reparsing will be done in xmlStringGetNodeList()
2073 * called by the attribute() function in SAX.c
2074 */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002075 static xmlChar buffer[6] = "&#38;";
2076
2077 if (len > buf_size - 10) {
2078 growBuffer(buf);
2079 }
2080 current = &buffer[0];
Daniel Veillard4948eb42000-08-29 09:41:15 +00002081 while (*current != 0) { /* non input consuming */
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002082 buf[len++] = *current++;
2083 }
2084 ctxt->token = 0;
2085 } else if ((c == '&') && (NXT(1) == '#')) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002086 int val = xmlParseCharRef(ctxt);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002087 if (val == '&') {
2088 /*
2089 * The reparsing will be done in xmlStringGetNodeList()
2090 * called by the attribute() function in SAX.c
2091 */
2092 static xmlChar buffer[6] = "&#38;";
2093
2094 if (len > buf_size - 10) {
2095 growBuffer(buf);
2096 }
2097 current = &buffer[0];
2098 while (*current != 0) { /* non input consuming */
2099 buf[len++] = *current++;
2100 }
2101 } else {
Daniel Veillard1e851392000-10-15 10:02:56 +00002102 len += xmlCopyChar(0, &buf[len], val);
Daniel Veillardb1059e22000-09-16 14:02:43 +00002103 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002104 } else if (c == '&') {
Daniel Veillard7f858501999-11-17 17:32:38 +00002105 ent = xmlParseEntityRef(ctxt);
2106 if ((ent != NULL) &&
2107 (ctxt->replaceEntities != 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002108 xmlChar *rep;
Daniel Veillard7f858501999-11-17 17:32:38 +00002109
Daniel Veillardcf461992000-03-14 18:30:20 +00002110 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2111 rep = xmlStringDecodeEntities(ctxt, ent->content,
2112 XML_SUBSTITUTE_REF, 0, 0, 0);
2113 if (rep != NULL) {
2114 current = rep;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002115 while (*current != 0) { /* non input consuming */
Daniel Veillardbe803962000-06-28 23:40:59 +00002116 buf[len++] = *current++;
2117 if (len > buf_size - 10) {
2118 growBuffer(buf);
Daniel Veillardcf461992000-03-14 18:30:20 +00002119 }
2120 }
2121 xmlFree(rep);
Daniel Veillard7f858501999-11-17 17:32:38 +00002122 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002123 } else {
2124 if (ent->content != NULL)
Daniel Veillardbe803962000-06-28 23:40:59 +00002125 buf[len++] = ent->content[0];
Daniel Veillard7f858501999-11-17 17:32:38 +00002126 }
2127 } else if (ent != NULL) {
2128 int i = xmlStrlen(ent->name);
2129 const xmlChar *cur = ent->name;
2130
Daniel Veillardcf461992000-03-14 18:30:20 +00002131 /*
2132 * This may look absurd but is needed to detect
2133 * entities problems
2134 */
Daniel Veillard87b95392000-08-12 21:12:04 +00002135 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2136 (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002137 xmlChar *rep;
2138 rep = xmlStringDecodeEntities(ctxt, ent->content,
2139 XML_SUBSTITUTE_REF, 0, 0, 0);
2140 if (rep != NULL)
2141 xmlFree(rep);
2142 }
2143
2144 /*
2145 * Just output the reference
2146 */
Daniel Veillardbe803962000-06-28 23:40:59 +00002147 buf[len++] = '&';
2148 if (len > buf_size - i - 10) {
2149 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00002150 }
2151 for (;i > 0;i--)
Daniel Veillardbe803962000-06-28 23:40:59 +00002152 buf[len++] = *cur++;
2153 buf[len++] = ';';
Daniel Veillard7f858501999-11-17 17:32:38 +00002154 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002155 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00002156 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2157 COPY_BUF(l,buf,len,0x20);
2158 if (len > buf_size - 10) {
2159 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00002160 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002161 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00002162 COPY_BUF(l,buf,len,c);
2163 if (len > buf_size - 10) {
2164 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00002165 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002166 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002167 NEXTL(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00002168 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002169 GROW;
2170 c = CUR_CHAR(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00002171 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002172 buf[len++] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002173 if (RAW == '<') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002174 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillard7f858501999-11-17 17:32:38 +00002175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2176 ctxt->sax->error(ctxt->userData,
2177 "Unescaped '<' not allowed in attributes values\n");
Daniel Veillard7f858501999-11-17 17:32:38 +00002178 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002179 ctxt->disableSAX = 1;
2180 } else if (RAW != limit) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002181 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
Daniel Veillard7f858501999-11-17 17:32:38 +00002182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2183 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
Daniel Veillard7f858501999-11-17 17:32:38 +00002184 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002185 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00002186 } else
2187 NEXT;
Daniel Veillardbe803962000-06-28 23:40:59 +00002188 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002189}
2190
Daniel Veillard11e00581998-10-24 18:27:49 +00002191/**
2192 * xmlParseSystemLiteral:
2193 * @ctxt: an XML parser context
2194 *
2195 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00002196 *
2197 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00002198 *
2199 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002200 */
2201
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002202xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002203xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002204 xmlChar *buf = NULL;
2205 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002206 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00002207 int cur, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002208 xmlChar stop;
Daniel Veillardcf461992000-03-14 18:30:20 +00002209 int state = ctxt->instate;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002210 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002211
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002212 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002213 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002214 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002215 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00002216 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002217 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002218 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002219 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002220 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002222 ctxt->sax->error(ctxt->userData,
2223 "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002224 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002225 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002226 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002227 }
2228
Daniel Veillard10a2c651999-12-12 13:03:50 +00002229 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2230 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002231 xmlGenericError(xmlGenericErrorContext,
2232 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002233 return(NULL);
2234 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002235 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2236 cur = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002237 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Daniel Veillardcf461992000-03-14 18:30:20 +00002238 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002239 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002240 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002241 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002242 xmlGenericError(xmlGenericErrorContext,
2243 "realloc of %d byte failed\n", size);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002244 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002245 return(NULL);
2246 }
2247 }
Daniel Veillard4948eb42000-08-29 09:41:15 +00002248 count++;
2249 if (count > 50) {
2250 GROW;
2251 count = 0;
2252 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002253 COPY_BUF(l,buf,len,cur);
2254 NEXTL(l);
2255 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002256 if (cur == 0) {
2257 GROW;
2258 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002259 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002260 }
2261 }
2262 buf[len] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002263 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002264 if (!IS_CHAR(cur)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002265 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2267 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00002268 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002269 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002270 } else {
2271 NEXT;
2272 }
2273 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002274}
2275
Daniel Veillard11e00581998-10-24 18:27:49 +00002276/**
2277 * xmlParsePubidLiteral:
2278 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00002279 *
Daniel Veillard11e00581998-10-24 18:27:49 +00002280 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00002281 *
2282 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2283 *
2284 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002285 */
2286
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002287xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002288xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002289 xmlChar *buf = NULL;
2290 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002291 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002292 xmlChar cur;
2293 xmlChar stop;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002294 int count = 0;
Daniel Veillard6077d031999-10-09 09:11:45 +00002295
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002296 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002297 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002298 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002299 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00002300 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002301 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002302 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002303 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002304 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002306 ctxt->sax->error(ctxt->userData,
2307 "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002308 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002309 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002310 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002311 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002312 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2313 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002314 xmlGenericError(xmlGenericErrorContext,
2315 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002316 return(NULL);
2317 }
2318 cur = CUR;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002319 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002320 if (len + 1 >= size) {
2321 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002322 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002323 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002324 xmlGenericError(xmlGenericErrorContext,
2325 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002326 return(NULL);
2327 }
2328 }
2329 buf[len++] = cur;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002330 count++;
2331 if (count > 50) {
2332 GROW;
2333 count = 0;
2334 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002335 NEXT;
2336 cur = CUR;
2337 if (cur == 0) {
2338 GROW;
2339 SHRINK;
2340 cur = CUR;
2341 }
2342 }
2343 buf[len] = 0;
2344 if (cur != stop) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002345 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2347 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00002348 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002349 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002350 } else {
2351 NEXT;
2352 }
2353 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002354}
2355
Daniel Veillard11e00581998-10-24 18:27:49 +00002356/**
2357 * xmlParseCharData:
2358 * @ctxt: an XML parser context
2359 * @cdata: int indicating whether we are within a CDATA section
2360 *
2361 * parse a CharData section.
2362 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002363 *
Daniel Veillardcf461992000-03-14 18:30:20 +00002364 * The right angle bracket (>) may be represented using the string "&gt;",
2365 * and must, for compatibility, be escaped using "&gt;" or a character
2366 * reference when it appears in the string "]]>" in content, when that
2367 * string is not marking the end of a CDATA section.
2368 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002369 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2370 */
2371
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002372void
2373xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002374 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002375 int nbchar = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002376 int cur, l;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002377 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002378
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002379 SHRINK;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002380 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002381 cur = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002382 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
Daniel Veillard5e873c42000-04-12 13:27:38 +00002383 ((cur != '&') || (ctxt->token == '&')) &&
Daniel Veillardb1059e22000-09-16 14:02:43 +00002384 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002385 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002386 (NXT(2) == '>')) {
2387 if (cdata) break;
2388 else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002389 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00002391 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002392 "Sequence ']]>' not allowed in content\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002393 /* Should this be relaxed ??? I see a "must here */
2394 ctxt->wellFormed = 0;
2395 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002396 }
2397 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002398 COPY_BUF(l,buf,nbchar,cur);
2399 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002400 /*
2401 * Ok the segment is to be consumed as chars.
2402 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002403 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002404 if (areBlanks(ctxt, buf, nbchar)) {
2405 if (ctxt->sax->ignorableWhitespace != NULL)
2406 ctxt->sax->ignorableWhitespace(ctxt->userData,
2407 buf, nbchar);
2408 } else {
2409 if (ctxt->sax->characters != NULL)
2410 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2411 }
2412 }
2413 nbchar = 0;
2414 }
Daniel Veillard4948eb42000-08-29 09:41:15 +00002415 count++;
2416 if (count > 50) {
2417 GROW;
2418 count = 0;
2419 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002420 NEXTL(l);
2421 cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002422 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002423 if (nbchar != 0) {
2424 /*
2425 * Ok the segment is to be consumed as chars.
2426 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002427 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002428 if (areBlanks(ctxt, buf, nbchar)) {
2429 if (ctxt->sax->ignorableWhitespace != NULL)
2430 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2431 } else {
2432 if (ctxt->sax->characters != NULL)
2433 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2434 }
2435 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002436 }
2437}
2438
Daniel Veillard11e00581998-10-24 18:27:49 +00002439/**
2440 * xmlParseExternalID:
2441 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002442 * @publicID: a xmlChar** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002443 * @strict: indicate whether we should restrict parsing to only
2444 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00002445 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002446 * Parse an External ID or a Public ID
2447 *
2448 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2449 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00002450 *
2451 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2452 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002453 *
2454 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2455 *
2456 * Returns the function returns SystemLiteral and in the second
2457 * case publicID receives PubidLiteral, is strict is off
2458 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002459 */
2460
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002461xmlChar *
2462xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2463 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002464
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002465 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00002466 if ((RAW == 'S') && (NXT(1) == 'Y') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002467 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2468 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2469 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002470 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002471 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002473 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002474 "Space required after 'SYSTEM'\n");
2475 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002476 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002477 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002478 SKIP_BLANKS;
2479 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002480 if (URI == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002481 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002483 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002484 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002485 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002486 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002487 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002488 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002489 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2490 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2491 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002492 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002493 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002495 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002496 "Space required after 'PUBLIC'\n");
2497 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002498 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002499 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002500 SKIP_BLANKS;
2501 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002502 if (*publicID == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002503 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002505 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002506 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002507 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002508 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002509 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00002510 if (strict) {
2511 /*
2512 * We don't handle [83] so "S SystemLiteral" is required.
2513 */
2514 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002515 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002517 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002518 "Space required after the Public Identifier\n");
2519 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002520 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002521 }
2522 } else {
2523 /*
2524 * We handle [83] so we return immediately, if
2525 * "S SystemLiteral" is not detected. From a purely parsing
2526 * point of view that's a nice mess.
2527 */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002528 const xmlChar *ptr;
2529 GROW;
2530
2531 ptr = CUR_PTR;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002532 if (!IS_BLANK(*ptr)) return(NULL);
2533
Daniel Veillard4948eb42000-08-29 09:41:15 +00002534 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Daniel Veillardcf461992000-03-14 18:30:20 +00002535 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002536 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002537 SKIP_BLANKS;
2538 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002539 if (URI == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002540 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002542 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002543 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002544 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002545 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002546 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002547 }
2548 return(URI);
2549}
2550
Daniel Veillard11e00581998-10-24 18:27:49 +00002551/**
2552 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00002553 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002554 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002555 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00002556 * The spec says that "For compatibility, the string "--" (double-hyphen)
2557 * must not occur within comments. "
2558 *
2559 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2560 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002561void
Daniel Veillardb96e6431999-08-29 21:02:19 +00002562xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002563 xmlChar *buf = NULL;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002564 int len;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002565 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00002566 int q, ql;
2567 int r, rl;
2568 int cur, l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002569 xmlParserInputState state;
Daniel Veillardcf461992000-03-14 18:30:20 +00002570 xmlParserInputPtr input = ctxt->input;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002571 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002572
2573 /*
2574 * Check that there is a comment right here.
2575 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002576 if ((RAW != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00002577 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002578
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002579 state = ctxt->instate;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002580 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002581 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002582 SKIP(4);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002583 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2584 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002585 xmlGenericError(xmlGenericErrorContext,
2586 "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002587 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002588 return;
2589 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002590 q = CUR_CHAR(ql);
2591 NEXTL(ql);
2592 r = CUR_CHAR(rl);
2593 NEXTL(rl);
2594 cur = CUR_CHAR(l);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002595 len = 0;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002596 while (IS_CHAR(cur) && /* checked */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002597 ((cur != '>') ||
2598 (r != '-') || (q != '-'))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002599 if ((r == '-') && (q == '-') && (len > 1)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002600 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002602 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002603 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002604 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002605 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002606 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002607 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002608 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002609 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002610 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002611 xmlGenericError(xmlGenericErrorContext,
2612 "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002613 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002614 return;
2615 }
2616 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002617 COPY_BUF(ql,buf,len,q);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002618 q = r;
Daniel Veillardcf461992000-03-14 18:30:20 +00002619 ql = rl;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002620 r = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00002621 rl = l;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002622
2623 count++;
2624 if (count > 50) {
2625 GROW;
2626 count = 0;
2627 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002628 NEXTL(l);
2629 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002630 if (cur == 0) {
2631 SHRINK;
2632 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002633 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002634 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002635 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002636 buf[len] = 0;
2637 if (!IS_CHAR(cur)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002638 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002640 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00002641 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002642 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002643 ctxt->disableSAX = 1;
2644 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002645 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002646 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002647 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2649 ctxt->sax->error(ctxt->userData,
2650"Comment doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002651 ctxt->wellFormed = 0;
2652 ctxt->disableSAX = 1;
2653 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002654 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00002655 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2656 (!ctxt->disableSAX))
Daniel Veillard10a2c651999-12-12 13:03:50 +00002657 ctxt->sax->comment(ctxt->userData, buf);
2658 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002659 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002660 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002661}
2662
Daniel Veillard11e00581998-10-24 18:27:49 +00002663/**
2664 * xmlParsePITarget:
2665 * @ctxt: an XML parser context
2666 *
2667 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00002668 *
2669 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00002670 *
2671 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002672 */
2673
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002674xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002675xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002676 xmlChar *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002677
2678 name = xmlParseName(ctxt);
Daniel Veillard3c558c31999-12-22 11:30:41 +00002679 if ((name != NULL) &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002680 ((name[0] == 'x') || (name[0] == 'X')) &&
2681 ((name[1] == 'm') || (name[1] == 'M')) &&
2682 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillard3c558c31999-12-22 11:30:41 +00002683 int i;
Daniel Veillardcf461992000-03-14 18:30:20 +00002684 if ((name[0] == 'x') && (name[1] == 'm') &&
2685 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002686 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillardcf461992000-03-14 18:30:20 +00002687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2688 ctxt->sax->error(ctxt->userData,
2689 "XML declaration allowed only at the start of the document\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002690 ctxt->wellFormed = 0;
2691 ctxt->disableSAX = 1;
2692 return(name);
2693 } else if (name[3] == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002694 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillardcf461992000-03-14 18:30:20 +00002695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2696 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002697 ctxt->wellFormed = 0;
2698 ctxt->disableSAX = 1;
2699 return(name);
2700 }
Daniel Veillard3c558c31999-12-22 11:30:41 +00002701 for (i = 0;;i++) {
2702 if (xmlW3CPIs[i] == NULL) break;
Daniel Veillard8b5dd832000-10-01 20:28:44 +00002703 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
Daniel Veillard3c558c31999-12-22 11:30:41 +00002704 return(name);
2705 }
2706 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002707 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillard3c558c31999-12-22 11:30:41 +00002708 ctxt->sax->warning(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002709 "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002710 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002711 }
2712 return(name);
2713}
2714
Daniel Veillard11e00581998-10-24 18:27:49 +00002715/**
2716 * xmlParsePI:
2717 * @ctxt: an XML parser context
2718 *
2719 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002720 *
2721 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002722 *
2723 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002724 */
2725
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002726void
2727xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002728 xmlChar *buf = NULL;
2729 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002730 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00002731 int cur, l;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002732 xmlChar *target;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002733 xmlParserInputState state;
Daniel Veillard4948eb42000-08-29 09:41:15 +00002734 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002735
Daniel Veillardcf461992000-03-14 18:30:20 +00002736 if ((RAW == '<') && (NXT(1) == '?')) {
2737 xmlParserInputPtr input = ctxt->input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002738 state = ctxt->instate;
2739 ctxt->instate = XML_PARSER_PI;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002740 /*
2741 * this is a Processing Instruction.
2742 */
2743 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002744 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002745
2746 /*
2747 * Parse the target name and check for special support like
2748 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002749 */
2750 target = xmlParsePITarget(ctxt);
2751 if (target != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002752 if ((RAW == '?') && (NXT(1) == '>')) {
2753 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002754 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2756 ctxt->sax->error(ctxt->userData,
2757 "PI declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002758 ctxt->wellFormed = 0;
2759 ctxt->disableSAX = 1;
2760 }
2761 SKIP(2);
2762
2763 /*
2764 * SAX: PI detected.
2765 */
2766 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2767 (ctxt->sax->processingInstruction != NULL))
2768 ctxt->sax->processingInstruction(ctxt->userData,
2769 target, NULL);
2770 ctxt->instate = state;
2771 xmlFree(target);
2772 return;
2773 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002774 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2775 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002776 xmlGenericError(xmlGenericErrorContext,
2777 "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002778 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002779 return;
2780 }
2781 cur = CUR;
2782 if (!IS_BLANK(cur)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002783 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2785 ctxt->sax->error(ctxt->userData,
2786 "xmlParsePI: PI %s space expected\n", target);
2787 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002788 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002789 }
2790 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00002791 cur = CUR_CHAR(l);
Daniel Veillard4948eb42000-08-29 09:41:15 +00002792 while (IS_CHAR(cur) && /* checked */
Daniel Veillard10a2c651999-12-12 13:03:50 +00002793 ((cur != '?') || (NXT(1) != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002794 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002795 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002796 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00002797 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00002798 xmlGenericError(xmlGenericErrorContext,
2799 "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002800 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002801 return;
2802 }
2803 }
Daniel Veillard4948eb42000-08-29 09:41:15 +00002804 count++;
2805 if (count > 50) {
2806 GROW;
2807 count = 0;
2808 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002809 COPY_BUF(l,buf,len,cur);
2810 NEXTL(l);
2811 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002812 if (cur == 0) {
2813 SHRINK;
2814 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002815 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002816 }
2817 }
2818 buf[len] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002819 if (cur != '?') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002820 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
Daniel Veillard517752b1999-04-05 12:20:10 +00002821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002822 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00002823 "xmlParsePI: PI %s never end ...\n", target);
2824 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002825 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002826 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002827 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002828 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2830 ctxt->sax->error(ctxt->userData,
2831 "PI declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002832 ctxt->wellFormed = 0;
2833 ctxt->disableSAX = 1;
2834 }
Daniel Veillard517752b1999-04-05 12:20:10 +00002835 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002836
Daniel Veillard517752b1999-04-05 12:20:10 +00002837 /*
2838 * SAX: PI detected.
2839 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002840 if ((ctxt->sax) && (!ctxt->disableSAX) &&
Daniel Veillard517752b1999-04-05 12:20:10 +00002841 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002842 ctxt->sax->processingInstruction(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00002843 target, buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002844 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002845 xmlFree(buf);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002846 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002847 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002848 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002850 ctxt->sax->error(ctxt->userData,
2851 "xmlParsePI : no target name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002852 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002853 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002854 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002855 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002856 }
2857}
2858
Daniel Veillard11e00581998-10-24 18:27:49 +00002859/**
2860 * xmlParseNotationDecl:
2861 * @ctxt: an XML parser context
2862 *
2863 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002864 *
2865 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2866 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002867 * Hence there is actually 3 choices:
2868 * 'PUBLIC' S PubidLiteral
2869 * 'PUBLIC' S PubidLiteral S SystemLiteral
2870 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00002871 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002872 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00002873 */
2874
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002875void
2876xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002877 xmlChar *name;
2878 xmlChar *Pubid;
2879 xmlChar *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002880
Daniel Veillardcf461992000-03-14 18:30:20 +00002881 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002882 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2883 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2884 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00002885 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002886 xmlParserInputPtr input = ctxt->input;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002887 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002888 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002889 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002890 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002892 ctxt->sax->error(ctxt->userData,
2893 "Space required after '<!NOTATION'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002894 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002895 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002896 return;
2897 }
2898 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002899
2900 name = xmlParseName(ctxt);
2901 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002902 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002904 ctxt->sax->error(ctxt->userData,
2905 "NOTATION: Name expected here\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00002906 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002907 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002908 return;
2909 }
2910 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002911 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002913 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002914 "Space required after the NOTATION name'\n");
2915 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002916 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002917 return;
2918 }
2919 SKIP_BLANKS;
2920
Daniel Veillard260a68f1998-08-13 03:39:55 +00002921 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002922 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002923 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002924 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002925 SKIP_BLANKS;
2926
Daniel Veillardcf461992000-03-14 18:30:20 +00002927 if (RAW == '>') {
2928 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002929 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00002930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2931 ctxt->sax->error(ctxt->userData,
2932"Notation declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002933 ctxt->wellFormed = 0;
2934 ctxt->disableSAX = 1;
2935 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002936 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00002937 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2938 (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002939 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002940 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002941 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002943 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002944 "'>' required to close NOTATION declaration\n");
2945 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002946 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002947 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00002948 xmlFree(name);
2949 if (Systemid != NULL) xmlFree(Systemid);
2950 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002951 }
2952}
2953
Daniel Veillard11e00581998-10-24 18:27:49 +00002954/**
2955 * xmlParseEntityDecl:
2956 * @ctxt: an XML parser context
2957 *
2958 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002959 *
2960 * [70] EntityDecl ::= GEDecl | PEDecl
2961 *
2962 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2963 *
2964 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2965 *
2966 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2967 *
2968 * [74] PEDef ::= EntityValue | ExternalID
2969 *
2970 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00002971 *
2972 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00002973 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002974 */
2975
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002976void
2977xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002978 xmlChar *name = NULL;
2979 xmlChar *value = NULL;
2980 xmlChar *URI = NULL, *literal = NULL;
2981 xmlChar *ndata = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002982 int isParameter = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002983 xmlChar *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002984
Daniel Veillardb05deb71999-08-10 19:04:08 +00002985 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002986 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00002987 (NXT(2) == 'E') && (NXT(3) == 'N') &&
2988 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002989 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002990 xmlParserInputPtr input = ctxt->input;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002991 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002992 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002993 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002994 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00002995 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002997 ctxt->sax->error(ctxt->userData,
2998 "Space required after '<!ENTITY'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002999 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003000 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003001 }
3002 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003003
Daniel Veillardcf461992000-03-14 18:30:20 +00003004 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003005 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003006 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003007 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003009 ctxt->sax->error(ctxt->userData,
3010 "Space required after '%'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003011 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003012 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003013 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003014 SKIP_BLANKS;
3015 isParameter = 1;
3016 }
3017
3018 name = xmlParseName(ctxt);
3019 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003020 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003022 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003023 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003024 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003025 return;
3026 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003027 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003028 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003030 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003031 "Space required after the entity name\n");
3032 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003033 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003034 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003035 SKIP_BLANKS;
3036
3037 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003038 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00003039 */
3040 if (isParameter) {
Daniel Veillard39c7d712000-09-10 16:14:55 +00003041 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003042 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003043 if (value) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003044 if ((ctxt->sax != NULL) &&
3045 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003046 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003047 XML_INTERNAL_PARAMETER_ENTITY,
3048 NULL, NULL, value);
3049 }
Daniel Veillard39c7d712000-09-10 16:14:55 +00003050 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003051 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00003052 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003053 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
Daniel Veillardcf461992000-03-14 18:30:20 +00003054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3055 ctxt->sax->error(ctxt->userData,
3056 "Entity value required\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003057 ctxt->wellFormed = 0;
3058 ctxt->disableSAX = 1;
3059 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003060 if (URI) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003061 xmlURIPtr uri;
3062
3063 uri = xmlParseURI((const char *) URI);
3064 if (uri == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003065 ctxt->errNo = XML_ERR_INVALID_URI;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003066 if ((ctxt->sax != NULL) &&
3067 (!ctxt->disableSAX) &&
3068 (ctxt->sax->error != NULL))
3069 ctxt->sax->error(ctxt->userData,
3070 "Invalid URI: %s\n", URI);
3071 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003072 } else {
3073 if (uri->fragment != NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003074 ctxt->errNo = XML_ERR_URI_FRAGMENT;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003075 if ((ctxt->sax != NULL) &&
3076 (!ctxt->disableSAX) &&
3077 (ctxt->sax->error != NULL))
3078 ctxt->sax->error(ctxt->userData,
3079 "Fragment not allowed: %s\n", URI);
3080 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003081 } else {
3082 if ((ctxt->sax != NULL) &&
3083 (!ctxt->disableSAX) &&
3084 (ctxt->sax->entityDecl != NULL))
3085 ctxt->sax->entityDecl(ctxt->userData, name,
3086 XML_EXTERNAL_PARAMETER_ENTITY,
3087 literal, URI, NULL);
3088 }
3089 xmlFreeURI(uri);
3090 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003091 }
3092 }
3093 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00003094 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003095 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillardcf461992000-03-14 18:30:20 +00003096 if ((ctxt->sax != NULL) &&
3097 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003098 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003099 XML_INTERNAL_GENERAL_ENTITY,
3100 NULL, NULL, value);
3101 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003102 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00003103 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003104 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
Daniel Veillardcf461992000-03-14 18:30:20 +00003105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3106 ctxt->sax->error(ctxt->userData,
3107 "Entity value required\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003108 ctxt->wellFormed = 0;
3109 ctxt->disableSAX = 1;
3110 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003111 if (URI) {
3112 xmlURIPtr uri;
3113
3114 uri = xmlParseURI((const char *)URI);
3115 if (uri == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003116 ctxt->errNo = XML_ERR_INVALID_URI;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003117 if ((ctxt->sax != NULL) &&
3118 (!ctxt->disableSAX) &&
3119 (ctxt->sax->error != NULL))
3120 ctxt->sax->error(ctxt->userData,
3121 "Invalid URI: %s\n", URI);
3122 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003123 } else {
3124 if (uri->fragment != NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003125 ctxt->errNo = XML_ERR_URI_FRAGMENT;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003126 if ((ctxt->sax != NULL) &&
3127 (!ctxt->disableSAX) &&
3128 (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "Fragment not allowed: %s\n", URI);
3131 ctxt->wellFormed = 0;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003132 }
3133 xmlFreeURI(uri);
3134 }
3135 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003136 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003137 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003139 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003140 "Space required before 'NDATA'\n");
3141 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003142 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003143 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003144 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003145 if ((RAW == 'N') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003146 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3147 (NXT(4) == 'A')) {
3148 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003149 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003150 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003152 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003153 "Space required after 'NDATA'\n");
3154 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003155 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003156 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003157 SKIP_BLANKS;
3158 ndata = xmlParseName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003159 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00003160 (ctxt->sax->unparsedEntityDecl != NULL))
3161 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003162 literal, URI, ndata);
3163 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00003164 if ((ctxt->sax != NULL) &&
3165 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003166 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003167 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3168 literal, URI, NULL);
3169 }
3170 }
3171 }
3172 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003173 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003174 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003176 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003177 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003178 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003179 ctxt->disableSAX = 1;
3180 } else {
3181 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003182 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00003183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3184 ctxt->sax->error(ctxt->userData,
3185"Entity declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003186 ctxt->wellFormed = 0;
3187 ctxt->disableSAX = 1;
3188 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003189 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003190 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00003191 if (orig != NULL) {
3192 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00003193 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00003194 */
3195 xmlEntityPtr cur = NULL;
3196
Daniel Veillardb05deb71999-08-10 19:04:08 +00003197 if (isParameter) {
3198 if ((ctxt->sax != NULL) &&
3199 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003200 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003201 } else {
3202 if ((ctxt->sax != NULL) &&
3203 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003204 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003205 }
3206 if (cur != NULL) {
3207 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003208 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003209 else
3210 cur->orig = orig;
3211 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00003212 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00003213 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003214 if (name != NULL) xmlFree(name);
3215 if (value != NULL) xmlFree(value);
3216 if (URI != NULL) xmlFree(URI);
3217 if (literal != NULL) xmlFree(literal);
3218 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003219 }
3220}
3221
Daniel Veillard11e00581998-10-24 18:27:49 +00003222/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003223 * xmlParseDefaultDecl:
3224 * @ctxt: an XML parser context
3225 * @value: Receive a possible fixed default value for the attribute
3226 *
3227 * Parse an attribute default declaration
3228 *
3229 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3230 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003231 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003232 * if the default declaration is the keyword #REQUIRED, then the
3233 * attribute must be specified for all elements of the type in the
3234 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003235 *
3236 * [ VC: Attribute Default Legal ]
3237 * The declared default value must meet the lexical constraints of
3238 * the declared attribute type c.f. xmlValidateAttributeDecl()
3239 *
3240 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003241 * if an attribute has a default value declared with the #FIXED
3242 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003243 *
3244 * [ WFC: No < in Attribute Values ]
3245 * handled in xmlParseAttValue()
3246 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003247 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3248 * or XML_ATTRIBUTE_FIXED.
3249 */
3250
3251int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003252xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003253 int val;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003254 xmlChar *ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003255
3256 *value = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003257 if ((RAW == '#') && (NXT(1) == 'R') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003258 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3259 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3260 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3261 (NXT(8) == 'D')) {
3262 SKIP(9);
3263 return(XML_ATTRIBUTE_REQUIRED);
3264 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003265 if ((RAW == '#') && (NXT(1) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003266 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3267 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3268 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3269 SKIP(8);
3270 return(XML_ATTRIBUTE_IMPLIED);
3271 }
3272 val = XML_ATTRIBUTE_NONE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003273 if ((RAW == '#') && (NXT(1) == 'F') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003274 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3275 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3276 SKIP(6);
3277 val = XML_ATTRIBUTE_FIXED;
3278 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003279 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003281 ctxt->sax->error(ctxt->userData,
3282 "Space required after '#FIXED'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003283 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003284 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003285 }
3286 SKIP_BLANKS;
3287 }
3288 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003289 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003290 if (ret == NULL) {
3291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003292 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003293 "Attribute default value declaration error\n");
3294 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003295 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003296 } else
3297 *value = ret;
3298 return(val);
3299}
3300
3301/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00003302 * xmlParseNotationType:
3303 * @ctxt: an XML parser context
3304 *
3305 * parse an Notation attribute type.
3306 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003307 * Note: the leading 'NOTATION' S part has already being parsed...
3308 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003309 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3310 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003311 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003312 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00003313 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003314 *
3315 * Returns: the notation attribute tree built while parsing
3316 */
3317
3318xmlEnumerationPtr
3319xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003320 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003321 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3322
Daniel Veillardcf461992000-03-14 18:30:20 +00003323 if (RAW != '(') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003324 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003326 ctxt->sax->error(ctxt->userData,
3327 "'(' required to start 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003328 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003329 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003330 return(NULL);
3331 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003332 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003333 do {
3334 NEXT;
3335 SKIP_BLANKS;
3336 name = xmlParseName(ctxt);
3337 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003338 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003340 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003341 "Name expected in NOTATION declaration\n");
3342 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003343 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003344 return(ret);
3345 }
3346 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003347 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003348 if (cur == NULL) return(ret);
3349 if (last == NULL) ret = last = cur;
3350 else {
3351 last->next = cur;
3352 last = cur;
3353 }
3354 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003355 } while (RAW == '|');
3356 if (RAW != ')') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003357 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003359 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003360 "')' required to finish NOTATION declaration\n");
3361 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003362 ctxt->disableSAX = 1;
3363 if ((last != NULL) && (last != ret))
3364 xmlFreeEnumeration(last);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003365 return(ret);
3366 }
3367 NEXT;
3368 return(ret);
3369}
3370
3371/**
3372 * xmlParseEnumerationType:
3373 * @ctxt: an XML parser context
3374 *
3375 * parse an Enumeration attribute type.
3376 *
3377 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3378 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003379 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003380 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00003381 * the declaration
3382 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003383 * Returns: the enumeration attribute tree built while parsing
3384 */
3385
3386xmlEnumerationPtr
3387xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003388 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003389 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3390
Daniel Veillardcf461992000-03-14 18:30:20 +00003391 if (RAW != '(') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003392 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003394 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003395 "'(' required to start ATTLIST enumeration\n");
3396 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003397 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003398 return(NULL);
3399 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003400 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003401 do {
3402 NEXT;
3403 SKIP_BLANKS;
3404 name = xmlParseNmtoken(ctxt);
3405 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003406 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003408 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003409 "NmToken expected in ATTLIST enumeration\n");
3410 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003411 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003412 return(ret);
3413 }
3414 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003415 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003416 if (cur == NULL) return(ret);
3417 if (last == NULL) ret = last = cur;
3418 else {
3419 last->next = cur;
3420 last = cur;
3421 }
3422 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003423 } while (RAW == '|');
3424 if (RAW != ')') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003425 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003427 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003428 "')' required to finish ATTLIST enumeration\n");
3429 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003430 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003431 return(ret);
3432 }
3433 NEXT;
3434 return(ret);
3435}
3436
3437/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003438 * xmlParseEnumeratedType:
3439 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003440 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003441 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003442 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003443 *
3444 * [57] EnumeratedType ::= NotationType | Enumeration
3445 *
3446 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3447 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003448 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003449 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00003450 */
3451
Daniel Veillard1e346af1999-02-22 10:33:01 +00003452int
3453xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003454 if ((RAW == 'N') && (NXT(1) == 'O') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003455 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3456 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3457 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3458 SKIP(8);
3459 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003460 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003462 ctxt->sax->error(ctxt->userData,
3463 "Space required after 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003464 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003465 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003466 return(0);
3467 }
3468 SKIP_BLANKS;
3469 *tree = xmlParseNotationType(ctxt);
3470 if (*tree == NULL) return(0);
3471 return(XML_ATTRIBUTE_NOTATION);
3472 }
3473 *tree = xmlParseEnumerationType(ctxt);
3474 if (*tree == NULL) return(0);
3475 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003476}
3477
Daniel Veillard11e00581998-10-24 18:27:49 +00003478/**
3479 * xmlParseAttributeType:
3480 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003481 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003482 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003483 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003484 *
3485 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3486 *
3487 * [55] StringType ::= 'CDATA'
3488 *
3489 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3490 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00003491 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003492 * Validity constraints for attribute values syntax are checked in
3493 * xmlValidateAttributeValue()
3494 *
3495 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003496 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00003497 * appear more than once in an XML document as a value of this type;
3498 * i.e., ID values must uniquely identify the elements which bear them.
3499 *
3500 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003501 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003502 *
3503 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003504 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003505 *
3506 * [ VC: IDREF ]
3507 * Values of type IDREF must match the Name production, and values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003508 * of type IDREFS must match Names; each IDREF Name must match the value
Daniel Veillardb96e6431999-08-29 21:02:19 +00003509 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00003510 * values must match the value of some ID attribute.
3511 *
3512 * [ VC: Entity Name ]
3513 * Values of type ENTITY must match the Name production, values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003514 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardb96e6431999-08-29 21:02:19 +00003515 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003516 *
3517 * [ VC: Name Token ]
3518 * Values of type NMTOKEN must match the Nmtoken production; values
3519 * of type NMTOKENS must match Nmtokens.
3520 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003521 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00003522 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003523int
Daniel Veillard1e346af1999-02-22 10:33:01 +00003524xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003525 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003526 if ((RAW == 'C') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003527 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3528 (NXT(4) == 'A')) {
3529 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003530 return(XML_ATTRIBUTE_CDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00003531 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003532 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00003533 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3534 SKIP(6);
3535 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillardcf461992000-03-14 18:30:20 +00003536 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00003537 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003538 (NXT(4) == 'F')) {
3539 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003540 return(XML_ATTRIBUTE_IDREF);
Daniel Veillardcf461992000-03-14 18:30:20 +00003541 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003542 SKIP(2);
3543 return(XML_ATTRIBUTE_ID);
Daniel Veillardcf461992000-03-14 18:30:20 +00003544 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003545 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3546 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3547 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003548 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillardcf461992000-03-14 18:30:20 +00003549 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003550 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3551 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3552 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3553 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003554 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillardcf461992000-03-14 18:30:20 +00003555 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003556 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3557 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003558 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3559 SKIP(8);
3560 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillardcf461992000-03-14 18:30:20 +00003561 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003562 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3563 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003564 (NXT(6) == 'N')) {
3565 SKIP(7);
3566 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003567 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00003568 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003569}
3570
Daniel Veillard11e00581998-10-24 18:27:49 +00003571/**
3572 * xmlParseAttributeListDecl:
3573 * @ctxt: an XML parser context
3574 *
3575 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003576 *
3577 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3578 *
3579 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00003580 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003581 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003582void
3583xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003584 xmlChar *elemName;
3585 xmlChar *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003586 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003587
Daniel Veillardcf461992000-03-14 18:30:20 +00003588 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003589 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3590 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3591 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003592 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003593 xmlParserInputPtr input = ctxt->input;
3594
Daniel Veillard260a68f1998-08-13 03:39:55 +00003595 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003596 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003597 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003599 ctxt->sax->error(ctxt->userData,
3600 "Space required after '<!ATTLIST'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003601 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003602 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003603 }
3604 SKIP_BLANKS;
3605 elemName = xmlParseName(ctxt);
3606 if (elemName == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003607 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003609 ctxt->sax->error(ctxt->userData,
3610 "ATTLIST: no name for Element\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003611 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003612 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003613 return;
3614 }
3615 SKIP_BLANKS;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00003616 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003617 while (RAW != '>') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003618 const xmlChar *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003619 int type;
3620 int def;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003621 xmlChar *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003622
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00003623 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003624 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003625 attrName = xmlParseName(ctxt);
3626 if (attrName == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003627 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003629 ctxt->sax->error(ctxt->userData,
3630 "ATTLIST: no name for Attribute\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003631 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003632 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003633 break;
3634 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00003635 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003636 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003637 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003639 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003640 "Space required after the attribute name\n");
3641 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003642 ctxt->disableSAX = 1;
3643 if (attrName != NULL)
3644 xmlFree(attrName);
3645 if (defaultValue != NULL)
3646 xmlFree(defaultValue);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003647 break;
3648 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003649 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003650
Daniel Veillard1e346af1999-02-22 10:33:01 +00003651 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillardcf461992000-03-14 18:30:20 +00003652 if (type <= 0) {
3653 if (attrName != NULL)
3654 xmlFree(attrName);
3655 if (defaultValue != NULL)
3656 xmlFree(defaultValue);
3657 break;
3658 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003659
Daniel Veillardb05deb71999-08-10 19:04:08 +00003660 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003661 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003662 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003664 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003665 "Space required after the attribute type\n");
3666 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003667 ctxt->disableSAX = 1;
3668 if (attrName != NULL)
3669 xmlFree(attrName);
3670 if (defaultValue != NULL)
3671 xmlFree(defaultValue);
3672 if (tree != NULL)
3673 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003674 break;
3675 }
3676 SKIP_BLANKS;
3677
3678 def = xmlParseDefaultDecl(ctxt, &defaultValue);
Daniel Veillardcf461992000-03-14 18:30:20 +00003679 if (def <= 0) {
3680 if (attrName != NULL)
3681 xmlFree(attrName);
3682 if (defaultValue != NULL)
3683 xmlFree(defaultValue);
3684 if (tree != NULL)
3685 xmlFreeEnumeration(tree);
3686 break;
3687 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003688
Daniel Veillardb05deb71999-08-10 19:04:08 +00003689 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003690 if (RAW != '>') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003691 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003694 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003695 "Space required after the attribute default value\n");
3696 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003697 ctxt->disableSAX = 1;
3698 if (attrName != NULL)
3699 xmlFree(attrName);
3700 if (defaultValue != NULL)
3701 xmlFree(defaultValue);
3702 if (tree != NULL)
3703 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003704 break;
3705 }
3706 SKIP_BLANKS;
3707 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003708 if (check == CUR_PTR) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003709 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003711 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003712 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003713 if (attrName != NULL)
3714 xmlFree(attrName);
3715 if (defaultValue != NULL)
3716 xmlFree(defaultValue);
3717 if (tree != NULL)
3718 xmlFreeEnumeration(tree);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003719 break;
3720 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003721 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3722 (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003723 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003724 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003725 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003726 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003727 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003728 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003729 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003730 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003731 if (RAW == '>') {
3732 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003733 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00003734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3735 ctxt->sax->error(ctxt->userData,
3736"Attribute list declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003737 ctxt->wellFormed = 0;
3738 ctxt->disableSAX = 1;
3739 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003740 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003741 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003742
Daniel Veillard6454aec1999-09-02 22:04:43 +00003743 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003744 }
3745}
3746
Daniel Veillard11e00581998-10-24 18:27:49 +00003747/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003748 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00003749 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003750 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003751 * parse the declaration for a Mixed Element content
3752 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00003753 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003754 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3755 * '(' S? '#PCDATA' S? ')'
3756 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003757 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3758 *
3759 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003760 * The same name must not appear more than once in a single
3761 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003762 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003763 * returns: the list of the xmlElementContentPtr describing the element choices
3764 */
3765xmlElementContentPtr
3766xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00003767 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003768 xmlChar *elem = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003769
Daniel Veillardb05deb71999-08-10 19:04:08 +00003770 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003771 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003772 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3773 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3774 (NXT(6) == 'A')) {
3775 SKIP(7);
3776 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003777 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003778 if (RAW == ')') {
3779 ctxt->entity = ctxt->input;
Daniel Veillard3b9def11999-01-31 22:15:06 +00003780 NEXT;
3781 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00003782 if (RAW == '*') {
Daniel Veillardf600e251999-12-18 15:32:46 +00003783 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3784 NEXT;
3785 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00003786 return(ret);
3787 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003788 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003789 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3790 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003791 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003792 while (RAW == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00003793 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003794 if (elem == NULL) {
3795 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3796 if (ret == NULL) return(NULL);
3797 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00003798 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003799 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00003800 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3801 if (n == NULL) return(NULL);
3802 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3803 cur->c2 = n;
3804 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00003805 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003806 }
3807 SKIP_BLANKS;
3808 elem = xmlParseName(ctxt);
3809 if (elem == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003810 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003812 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003813 "xmlParseElementMixedContentDecl : Name expected\n");
3814 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003815 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003816 xmlFreeElementContent(cur);
3817 return(NULL);
3818 }
3819 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003820 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003821 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003822 if ((RAW == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003823 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003824 cur->c2 = xmlNewElementContent(elem,
3825 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003826 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003827 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003828 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003829 ctxt->entity = ctxt->input;
Daniel Veillard1899e851999-02-01 12:18:54 +00003830 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003831 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00003832 if (elem != NULL) xmlFree(elem);
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003833 xmlFreeElementContent(ret);
3834 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003836 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00003837 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003838 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003839 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003840 return(NULL);
3841 }
3842
3843 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003844 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003846 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003847 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3848 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003849 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003850 }
3851 return(ret);
3852}
3853
3854/**
3855 * xmlParseElementChildrenContentDecl:
3856 * @ctxt: an XML parser context
3857 *
3858 * parse the declaration for a Mixed Element content
3859 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3860 *
3861 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003862 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3863 *
3864 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3865 *
3866 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3867 *
3868 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3869 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003870 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3871 * TODO Parameter-entity replacement text must be properly nested
3872 * with parenthetized groups. That is to say, if either of the
3873 * opening or closing parentheses in a choice, seq, or Mixed
3874 * construct is contained in the replacement text for a parameter
3875 * entity, both must be contained in the same replacement text. For
3876 * interoperability, if a parameter-entity reference appears in a
3877 * choice, seq, or Mixed construct, its replacement text should not
3878 * be empty, and neither the first nor last non-blank character of
3879 * the replacement text should be a connector (| or ,).
3880 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003881 * returns: the tree of xmlElementContentPtr describing the element
3882 * hierarchy.
3883 */
3884xmlElementContentPtr
3885xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
3886 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003887 xmlChar *elem;
3888 xmlChar type = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003889
3890 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003891 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003892 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003893 /* Recurse on first child */
3894 NEXT;
3895 SKIP_BLANKS;
3896 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3897 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003898 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003899 } else {
3900 elem = xmlParseName(ctxt);
3901 if (elem == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003902 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003904 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003905 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3906 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003907 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003908 return(NULL);
3909 }
3910 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003911 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003912 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003913 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003914 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003915 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003916 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003917 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003918 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003919 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003920 NEXT;
3921 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003922 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003923 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003924 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003925 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003926 }
3927 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003928 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003929 while (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003930 /*
3931 * Each loop we parse one separator and one element.
3932 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003933 if (RAW == ',') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003934 if (type == 0) type = CUR;
3935
3936 /*
3937 * Detect "Name | Name , Name" error
3938 */
3939 else if (type != CUR) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003940 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003942 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003943 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3944 type);
3945 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003946 ctxt->disableSAX = 1;
3947 if ((op != NULL) && (op != ret))
3948 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00003949 if ((last != NULL) && (last != ret) &&
3950 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00003951 xmlFreeElementContent(last);
3952 if (ret != NULL)
3953 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003954 return(NULL);
3955 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003956 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003957
3958 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3959 if (op == NULL) {
3960 xmlFreeElementContent(ret);
3961 return(NULL);
3962 }
3963 if (last == NULL) {
3964 op->c1 = ret;
3965 ret = cur = op;
3966 } else {
3967 cur->c2 = op;
3968 op->c1 = last;
3969 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00003970 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003971 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003972 } else if (RAW == '|') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003973 if (type == 0) type = CUR;
3974
3975 /*
3976 * Detect "Name , Name | Name" error
3977 */
3978 else if (type != CUR) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00003979 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003981 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003982 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3983 type);
3984 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003985 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00003986 if ((op != NULL) && (op != ret) && (op != last))
Daniel Veillardcf461992000-03-14 18:30:20 +00003987 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00003988 if ((last != NULL) && (last != ret) &&
3989 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00003990 xmlFreeElementContent(last);
3991 if (ret != NULL)
3992 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003993 return(NULL);
3994 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003995 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003996
3997 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3998 if (op == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003999 if ((op != NULL) && (op != ret))
4000 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004001 if ((last != NULL) && (last != ret) &&
4002 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004003 xmlFreeElementContent(last);
4004 if (ret != NULL)
4005 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004006 return(NULL);
4007 }
4008 if (last == NULL) {
4009 op->c1 = ret;
4010 ret = cur = op;
4011 } else {
4012 cur->c2 = op;
4013 op->c1 = last;
4014 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00004015 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004016 }
4017 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004018 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004020 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004021 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4022 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004023 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004024 if ((op != NULL) && (op != ret))
4025 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004026 if ((last != NULL) && (last != ret) &&
4027 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004028 xmlFreeElementContent(last);
4029 if (ret != NULL)
4030 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004031 return(NULL);
4032 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004033 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004034 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004035 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004036 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004037 /* Recurse on second child */
4038 NEXT;
4039 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00004040 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004041 SKIP_BLANKS;
4042 } else {
4043 elem = xmlParseName(ctxt);
4044 if (elem == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004045 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004047 ctxt->sax->error(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004048 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004049 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004050 ctxt->disableSAX = 1;
4051 if ((op != NULL) && (op != ret))
4052 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00004053 if ((last != NULL) && (last != ret) &&
4054 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00004055 xmlFreeElementContent(last);
4056 if (ret != NULL)
4057 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004058 return(NULL);
4059 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004060 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004061 xmlFree(elem);
Daniel Veillardcf461992000-03-14 18:30:20 +00004062 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004063 last->ocur = XML_ELEMENT_CONTENT_OPT;
4064 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004065 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004066 last->ocur = XML_ELEMENT_CONTENT_MULT;
4067 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004068 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004069 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4070 NEXT;
4071 } else {
4072 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4073 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004074 }
4075 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004076 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004077 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004078 if ((cur != NULL) && (last != NULL)) {
4079 cur->c2 = last;
4080 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004081 ctxt->entity = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004082 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004083 if (RAW == '?') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004084 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4085 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004086 } else if (RAW == '*') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004087 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4088 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004089 } else if (RAW == '+') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004090 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4091 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004092 }
4093 return(ret);
4094}
4095
4096/**
4097 * xmlParseElementContentDecl:
4098 * @ctxt: an XML parser context
4099 * @name: the name of the element being defined.
4100 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00004101 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004102 * parse the declaration for an Element content either Mixed or Children,
4103 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4104 *
4105 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00004106 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004107 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00004108 */
4109
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004110int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004111xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004112 xmlElementContentPtr *result) {
4113
4114 xmlElementContentPtr tree = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00004115 xmlParserInputPtr input = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004116 int res;
4117
4118 *result = NULL;
4119
Daniel Veillardcf461992000-03-14 18:30:20 +00004120 if (RAW != '(') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004121 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004123 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004124 "xmlParseElementContentDecl : '(' expected\n");
4125 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004126 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004127 return(-1);
4128 }
4129 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004130 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004131 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004132 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004133 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4134 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4135 (NXT(6) == 'A')) {
4136 tree = xmlParseElementMixedContentDecl(ctxt);
4137 res = XML_ELEMENT_TYPE_MIXED;
4138 } else {
4139 tree = xmlParseElementChildrenContentDecl(ctxt);
4140 res = XML_ELEMENT_TYPE_ELEMENT;
4141 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004142 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004143 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146"Element content declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004147 ctxt->wellFormed = 0;
4148 ctxt->disableSAX = 1;
4149 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004150 SKIP_BLANKS;
Daniel Veillard3b9def11999-01-31 22:15:06 +00004151 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004152 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004153}
4154
Daniel Veillard11e00581998-10-24 18:27:49 +00004155/**
4156 * xmlParseElementDecl:
4157 * @ctxt: an XML parser context
4158 *
4159 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004160 *
4161 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4162 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004163 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004164 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00004165 *
4166 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00004167 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004168int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004169xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004170 xmlChar *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004171 int ret = -1;
4172 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004173
Daniel Veillardb05deb71999-08-10 19:04:08 +00004174 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004175 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004176 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4177 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4178 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004179 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004180 xmlParserInputPtr input = ctxt->input;
4181
Daniel Veillard260a68f1998-08-13 03:39:55 +00004182 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004183 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004184 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004186 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004187 "Space required after 'ELEMENT'\n");
4188 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004189 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004190 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004191 SKIP_BLANKS;
4192 name = xmlParseName(ctxt);
4193 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004194 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004196 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004197 "xmlParseElementDecl: no name for Element\n");
4198 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004199 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004200 return(-1);
4201 }
4202 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004203 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004205 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004206 "Space required after the element name\n");
4207 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004208 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004209 }
4210 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004211 if ((RAW == 'E') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004212 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4213 (NXT(4) == 'Y')) {
4214 SKIP(5);
4215 /*
4216 * Element must always be empty.
4217 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004218 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004219 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004220 (NXT(2) == 'Y')) {
4221 SKIP(3);
4222 /*
4223 * Element is a generic container.
4224 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004225 ret = XML_ELEMENT_TYPE_ANY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004226 } else if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004227 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004228 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004229 /*
4230 * [ WFC: PEs in Internal Subset ] error handling.
4231 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004232 if ((RAW == '%') && (ctxt->external == 0) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00004233 (ctxt->inputNr == 1)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004234 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4236 ctxt->sax->error(ctxt->userData,
4237 "PEReference: forbidden within markup decl in internal subset\n");
4238 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004239 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4241 ctxt->sax->error(ctxt->userData,
4242 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4243 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004244 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004245 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004246 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004247 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004248 }
Daniel Veillard686d6b62000-01-03 11:08:02 +00004249
Daniel Veillard260a68f1998-08-13 03:39:55 +00004250 SKIP_BLANKS;
Daniel Veillard686d6b62000-01-03 11:08:02 +00004251 /*
4252 * Pop-up of finished entities.
4253 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004254 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard686d6b62000-01-03 11:08:02 +00004255 xmlPopInput(ctxt);
4256 SKIP_BLANKS;
4257
Daniel Veillardcf461992000-03-14 18:30:20 +00004258 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004259 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004261 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004262 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004263 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004264 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004265 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004266 if (input != ctxt->input) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004267 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
Daniel Veillardcf461992000-03-14 18:30:20 +00004268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4269 ctxt->sax->error(ctxt->userData,
4270"Element declaration doesn't start and stop in the same entity\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004271 ctxt->wellFormed = 0;
4272 ctxt->disableSAX = 1;
4273 }
4274
Daniel Veillard260a68f1998-08-13 03:39:55 +00004275 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004276 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4277 (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004278 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4279 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004280 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004281 if (content != NULL) {
4282 xmlFreeElementContent(content);
4283 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004284 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00004285 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004286 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004287 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004288 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004289}
4290
Daniel Veillard11e00581998-10-24 18:27:49 +00004291/**
4292 * xmlParseMarkupDecl:
4293 * @ctxt: an XML parser context
4294 *
4295 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004296 *
4297 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4298 * NotationDecl | PI | Comment
4299 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004300 * [ VC: Proper Declaration/PE Nesting ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00004301 * Parameter-entity replacement text must be properly nested with
Daniel Veillardb05deb71999-08-10 19:04:08 +00004302 * markup declarations. That is to say, if either the first character
4303 * or the last character of a markup declaration (markupdecl above) is
4304 * contained in the replacement text for a parameter-entity reference,
4305 * both must be contained in the same replacement text.
4306 *
4307 * [ WFC: PEs in Internal Subset ]
4308 * In the internal DTD subset, parameter-entity references can occur
4309 * only where markup declarations can occur, not within markup declarations.
4310 * (This does not apply to references that occur in external parameter
4311 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004312 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004313void
4314xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004315 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004316 xmlParseElementDecl(ctxt);
4317 xmlParseAttributeListDecl(ctxt);
4318 xmlParseEntityDecl(ctxt);
4319 xmlParseNotationDecl(ctxt);
4320 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00004321 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004322 /*
4323 * This is only for internal subset. On external entities,
4324 * the replacement is done before parsing stage
4325 */
4326 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4327 xmlParsePEReference(ctxt);
4328 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004329}
4330
Daniel Veillard11e00581998-10-24 18:27:49 +00004331/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004332 * xmlParseTextDecl:
4333 * @ctxt: an XML parser context
4334 *
4335 * parse an XML declaration header for external entities
4336 *
4337 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4338 *
Daniel Veillardcf461992000-03-14 18:30:20 +00004339 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
Daniel Veillard011b63c1999-06-02 17:44:04 +00004340 */
4341
Daniel Veillardcf461992000-03-14 18:30:20 +00004342void
Daniel Veillard011b63c1999-06-02 17:44:04 +00004343xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004344 xmlChar *version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004345
4346 /*
4347 * We know that '<?xml' is here.
4348 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004349 if ((RAW == '<') && (NXT(1) == '?') &&
4350 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4351 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4352 SKIP(5);
4353 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004354 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4356 ctxt->sax->error(ctxt->userData,
4357 "Text declaration '<?xml' required\n");
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004358 ctxt->wellFormed = 0;
4359 ctxt->disableSAX = 1;
4360
4361 return;
4362 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004363
4364 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004365 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004367 ctxt->sax->error(ctxt->userData,
4368 "Space needed after '<?xml'\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004369 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004370 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004371 }
4372 SKIP_BLANKS;
4373
4374 /*
4375 * We may have the VersionInfo here.
4376 */
4377 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004378 if (version == NULL)
4379 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00004380 ctxt->input->version = version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004381
4382 /*
4383 * We must have the encoding declaration
4384 */
4385 if (!IS_BLANK(CUR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004386 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004388 ctxt->sax->error(ctxt->userData, "Space needed here\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004389 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004390 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004391 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004392 xmlParseEncodingDecl(ctxt);
4393 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4394 /*
4395 * The XML REC instructs us to stop parsing right here
4396 */
4397 return;
4398 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004399
4400 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004401 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004402 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00004403 } else if (RAW == '>') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004404 /* Deprecated old WD ... */
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004405 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004407 ctxt->sax->error(ctxt->userData,
4408 "XML declaration must end-up with '?>'\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004409 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004410 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004411 NEXT;
4412 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004413 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004415 ctxt->sax->error(ctxt->userData,
4416 "parsing XML declaration: '?>' expected\n");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004417 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004418 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004419 MOVETO_ENDTAG(CUR_PTR);
4420 NEXT;
4421 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004422}
4423
4424/*
4425 * xmlParseConditionalSections
4426 * @ctxt: an XML parser context
4427 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004428 * [61] conditionalSect ::= includeSect | ignoreSect
4429 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4430 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4431 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4432 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4433 */
4434
4435void
4436xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004437 SKIP(3);
4438 SKIP_BLANKS;
4439 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4440 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4441 (NXT(6) == 'E')) {
4442 SKIP(7);
4443 SKIP_BLANKS;
4444 if (RAW != '[') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004445 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
Daniel Veillardcf461992000-03-14 18:30:20 +00004446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4447 ctxt->sax->error(ctxt->userData,
4448 "XML conditional section '[' expected\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004449 ctxt->wellFormed = 0;
4450 ctxt->disableSAX = 1;
4451 } else {
4452 NEXT;
4453 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004454 if (xmlParserDebugEntities) {
4455 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004456 xmlGenericError(xmlGenericErrorContext,
4457 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004458 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004459 xmlGenericError(xmlGenericErrorContext,
4460 "Entering INCLUDE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004461 }
4462
Daniel Veillardcf461992000-03-14 18:30:20 +00004463 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4464 (NXT(2) != '>'))) {
4465 const xmlChar *check = CUR_PTR;
4466 int cons = ctxt->input->consumed;
4467 int tok = ctxt->token;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004468
Daniel Veillardcf461992000-03-14 18:30:20 +00004469 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4470 xmlParseConditionalSections(ctxt);
4471 } else if (IS_BLANK(CUR)) {
4472 NEXT;
4473 } else if (RAW == '%') {
4474 xmlParsePEReference(ctxt);
4475 } else
4476 xmlParseMarkupDecl(ctxt);
4477
4478 /*
4479 * Pop-up of finished entities.
4480 */
4481 while ((RAW == 0) && (ctxt->inputNr > 1))
4482 xmlPopInput(ctxt);
4483
4484 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4485 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004486 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardcf461992000-03-14 18:30:20 +00004487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4488 ctxt->sax->error(ctxt->userData,
4489 "Content error in the external subset\n");
4490 ctxt->wellFormed = 0;
4491 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004492 break;
4493 }
4494 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004495 if (xmlParserDebugEntities) {
4496 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004497 xmlGenericError(xmlGenericErrorContext,
4498 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004499 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004500 xmlGenericError(xmlGenericErrorContext,
4501 "Leaving INCLUDE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004502 }
4503
Daniel Veillardcf461992000-03-14 18:30:20 +00004504 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4505 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4506 int state;
Daniel Veillard41e06512000-11-13 11:47:47 +00004507 int instate;
4508 int depth = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004509
4510 SKIP(6);
4511 SKIP_BLANKS;
4512 if (RAW != '[') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004513 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
Daniel Veillardcf461992000-03-14 18:30:20 +00004514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4515 ctxt->sax->error(ctxt->userData,
4516 "XML conditional section '[' expected\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004517 ctxt->wellFormed = 0;
4518 ctxt->disableSAX = 1;
4519 } else {
4520 NEXT;
4521 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004522 if (xmlParserDebugEntities) {
4523 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004524 xmlGenericError(xmlGenericErrorContext,
4525 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004526 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004527 xmlGenericError(xmlGenericErrorContext,
4528 "Entering IGNORE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004529 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004530
4531 /*
4532 * Parse up to the end of the conditionnal section
4533 * But disable SAX event generating DTD building in the meantime
4534 */
4535 state = ctxt->disableSAX;
Daniel Veillard41e06512000-11-13 11:47:47 +00004536 instate = ctxt->instate;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004537 ctxt->disableSAX = 1;
Daniel Veillard41e06512000-11-13 11:47:47 +00004538 ctxt->instate = XML_PARSER_IGNORE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004539
Daniel Veillard41e06512000-11-13 11:47:47 +00004540 while (depth >= 0) {
4541 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4542 depth++;
4543 SKIP(3);
4544 continue;
4545 }
4546 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4547 if (--depth >= 0) SKIP(3);
4548 continue;
4549 }
4550 NEXT;
4551 continue;
Daniel Veillardcf461992000-03-14 18:30:20 +00004552 }
Daniel Veillard41e06512000-11-13 11:47:47 +00004553
Daniel Veillardcf461992000-03-14 18:30:20 +00004554 ctxt->disableSAX = state;
Daniel Veillard41e06512000-11-13 11:47:47 +00004555 ctxt->instate = instate;
4556
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004557 if (xmlParserDebugEntities) {
4558 if ((ctxt->input != NULL) && (ctxt->input->filename))
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004559 xmlGenericError(xmlGenericErrorContext,
4560 "%s(%d): ", ctxt->input->filename,
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004561 ctxt->input->line);
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00004562 xmlGenericError(xmlGenericErrorContext,
4563 "Leaving IGNORE Conditional Section\n");
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004564 }
4565
Daniel Veillardcf461992000-03-14 18:30:20 +00004566 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004567 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
Daniel Veillardcf461992000-03-14 18:30:20 +00004568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4569 ctxt->sax->error(ctxt->userData,
4570 "XML conditional section INCLUDE or IGNORE keyword expected\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004571 ctxt->wellFormed = 0;
4572 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004573 }
4574
Daniel Veillardcf461992000-03-14 18:30:20 +00004575 if (RAW == 0)
Daniel Veillard71b656e2000-01-05 14:46:17 +00004576 SHRINK;
4577
Daniel Veillardcf461992000-03-14 18:30:20 +00004578 if (RAW == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004579 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4581 ctxt->sax->error(ctxt->userData,
4582 "XML conditional section not closed\n");
4583 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004584 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004585 } else {
4586 SKIP(3);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004587 }
4588}
4589
4590/**
Daniel Veillard00fdf371999-10-08 09:40:39 +00004591 * xmlParseExternalSubset:
Daniel Veillard011b63c1999-06-02 17:44:04 +00004592 * @ctxt: an XML parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00004593 * @ExternalID: the external identifier
4594 * @SystemID: the system identifier (or URL)
Daniel Veillard011b63c1999-06-02 17:44:04 +00004595 *
4596 * parse Markup declarations from an external subset
4597 *
4598 * [30] extSubset ::= textDecl? extSubsetDecl
4599 *
4600 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004601 */
4602void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004603xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4604 const xmlChar *SystemID) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00004605 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004606 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard011b63c1999-06-02 17:44:04 +00004607 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4608 (NXT(4) == 'l')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004609 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004610 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4611 /*
4612 * The XML REC instructs us to stop parsing right here
4613 */
4614 ctxt->instate = XML_PARSER_EOF;
4615 return;
4616 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004617 }
4618 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004619 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004620 }
4621 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4622 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4623
Daniel Veillardb05deb71999-08-10 19:04:08 +00004624 ctxt->instate = XML_PARSER_DTD;
4625 ctxt->external = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004626 while (((RAW == '<') && (NXT(1) == '?')) ||
4627 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard011b63c1999-06-02 17:44:04 +00004628 IS_BLANK(CUR)) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004629 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004630 int cons = ctxt->input->consumed;
Daniel Veillardcf461992000-03-14 18:30:20 +00004631 int tok = ctxt->token;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004632
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004633 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004634 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004635 xmlParseConditionalSections(ctxt);
4636 } else if (IS_BLANK(CUR)) {
4637 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004638 } else if (RAW == '%') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004639 xmlParsePEReference(ctxt);
4640 } else
4641 xmlParseMarkupDecl(ctxt);
4642
4643 /*
4644 * Pop-up of finished entities.
4645 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004646 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004647 xmlPopInput(ctxt);
4648
Daniel Veillardcf461992000-03-14 18:30:20 +00004649 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4650 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004651 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4653 ctxt->sax->error(ctxt->userData,
4654 "Content error in the external subset\n");
4655 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004656 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004657 break;
4658 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004659 }
4660
Daniel Veillardcf461992000-03-14 18:30:20 +00004661 if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004662 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4664 ctxt->sax->error(ctxt->userData,
4665 "Extra content at the end of the document\n");
4666 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004667 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004668 }
4669
4670}
4671
4672/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004673 * xmlParseReference:
4674 * @ctxt: an XML parser context
4675 *
4676 * parse and handle entity references in content, depending on the SAX
4677 * interface, this may end-up in a call to character() if this is a
4678 * CharRef, a predefined entity, if there is no reference() callback.
4679 * or if the parser was asked to switch to that mode.
4680 *
4681 * [67] Reference ::= EntityRef | CharRef
4682 */
4683void
4684xmlParseReference(xmlParserCtxtPtr ctxt) {
4685 xmlEntityPtr ent;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004686 xmlChar *val;
Daniel Veillardcf461992000-03-14 18:30:20 +00004687 if (RAW != '&') return;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004688
4689 if (NXT(1) == '#') {
Daniel Veillardcf461992000-03-14 18:30:20 +00004690 int i = 0;
4691 xmlChar out[10];
4692 int hex = NXT(2);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004693 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00004694
Daniel Veillardbe803962000-06-28 23:40:59 +00004695 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004696 /*
4697 * So we are using non-UTF-8 buffers
4698 * Check that the char fit on 8bits, if not
4699 * generate a CharRef.
4700 */
4701 if (val <= 0xFF) {
4702 out[0] = val;
4703 out[1] = 0;
4704 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4705 (!ctxt->disableSAX))
4706 ctxt->sax->characters(ctxt->userData, out, 1);
4707 } else {
4708 if ((hex == 'x') || (hex == 'X'))
4709 sprintf((char *)out, "#x%X", val);
4710 else
4711 sprintf((char *)out, "#%d", val);
4712 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4713 (!ctxt->disableSAX))
4714 ctxt->sax->reference(ctxt->userData, out);
4715 }
4716 } else {
4717 /*
4718 * Just encode the value in UTF-8
4719 */
4720 COPY_BUF(0 ,out, i, val);
4721 out[i] = 0;
4722 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4723 (!ctxt->disableSAX))
4724 ctxt->sax->characters(ctxt->userData, out, i);
4725 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004726 } else {
4727 ent = xmlParseEntityRef(ctxt);
4728 if (ent == NULL) return;
4729 if ((ent->name != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00004730 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4731 xmlNodePtr list = NULL;
4732 int ret;
4733
4734
4735 /*
4736 * The first reference to the entity trigger a parsing phase
4737 * where the ent->children is filled with the result from
4738 * the parsing.
4739 */
4740 if (ent->children == NULL) {
4741 xmlChar *value;
4742 value = ent->content;
4743
4744 /*
4745 * Check that this entity is well formed
4746 */
4747 if ((value != NULL) &&
4748 (value[1] == 0) && (value[0] == '<') &&
Daniel Veillard8b5dd832000-10-01 20:28:44 +00004749 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004750 /*
Daniel Veillarde0854c32000-08-27 21:12:29 +00004751 * DONE: get definite answer on this !!!
Daniel Veillardcf461992000-03-14 18:30:20 +00004752 * Lots of entity decls are used to declare a single
4753 * char
4754 * <!ENTITY lt "<">
4755 * Which seems to be valid since
4756 * 2.4: The ampersand character (&) and the left angle
4757 * bracket (<) may appear in their literal form only
4758 * when used ... They are also legal within the literal
4759 * entity value of an internal entity declaration;i
4760 * see "4.3.2 Well-Formed Parsed Entities".
4761 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4762 * Looking at the OASIS test suite and James Clark
4763 * tests, this is broken. However the XML REC uses
4764 * it. Is the XML REC not well-formed ????
4765 * This is a hack to avoid this problem
Daniel Veillarde0854c32000-08-27 21:12:29 +00004766 *
4767 * ANSWER: since lt gt amp .. are already defined,
4768 * this is a redefinition and hence the fact that the
4769 * contentis not well balanced is not a Wf error, this
4770 * is lousy but acceptable.
Daniel Veillardcf461992000-03-14 18:30:20 +00004771 */
4772 list = xmlNewDocText(ctxt->myDoc, value);
4773 if (list != NULL) {
4774 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4775 (ent->children == NULL)) {
4776 ent->children = list;
4777 ent->last = list;
4778 list->parent = (xmlNodePtr) ent;
4779 } else {
4780 xmlFreeNodeList(list);
4781 }
4782 } else if (list != NULL) {
4783 xmlFreeNodeList(list);
4784 }
4785 } else {
4786 /*
4787 * 4.3.2: An internal general parsed entity is well-formed
4788 * if its replacement text matches the production labeled
4789 * content.
4790 */
4791 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4792 ctxt->depth++;
4793 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4794 ctxt->sax, NULL, ctxt->depth,
4795 value, &list);
4796 ctxt->depth--;
4797 } else if (ent->etype ==
4798 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4799 ctxt->depth++;
4800 ret = xmlParseExternalEntity(ctxt->myDoc,
4801 ctxt->sax, NULL, ctxt->depth,
Daniel Veillard39c7d712000-09-10 16:14:55 +00004802 ent->URI, ent->ExternalID, &list);
Daniel Veillardcf461992000-03-14 18:30:20 +00004803 ctxt->depth--;
4804 } else {
4805 ret = -1;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Internal: invalid entity type\n");
4809 }
4810 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004811 ctxt->errNo = XML_ERR_ENTITY_LOOP;
Daniel Veillardcf461992000-03-14 18:30:20 +00004812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4813 ctxt->sax->error(ctxt->userData,
4814 "Detected entity reference loop\n");
4815 ctxt->wellFormed = 0;
4816 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00004817 } else if ((ret == 0) && (list != NULL)) {
4818 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4819 (ent->children == NULL)) {
4820 ent->children = list;
4821 while (list != NULL) {
4822 list->parent = (xmlNodePtr) ent;
4823 if (list->next == NULL)
4824 ent->last = list;
4825 list = list->next;
4826 }
4827 } else {
4828 xmlFreeNodeList(list);
4829 }
4830 } else if (ret > 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004831 ctxt->errNo = ret;
Daniel Veillardcf461992000-03-14 18:30:20 +00004832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4833 ctxt->sax->error(ctxt->userData,
4834 "Entity value required\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00004835 ctxt->wellFormed = 0;
4836 ctxt->disableSAX = 1;
4837 } else if (list != NULL) {
4838 xmlFreeNodeList(list);
4839 }
4840 }
4841 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00004842 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00004843 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004844 /*
4845 * Create a node.
4846 */
4847 ctxt->sax->reference(ctxt->userData, ent->name);
4848 return;
4849 } else if (ctxt->replaceEntities) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004850 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4851 /*
4852 * Seems we are generating the DOM content, do
4853 * a simple tree copy
4854 */
4855 xmlNodePtr new;
4856 new = xmlCopyNodeList(ent->children);
4857
4858 xmlAddChildList(ctxt->node, new);
4859 /*
4860 * This is to avoid a nasty side effect, see
4861 * characters() in SAX.c
4862 */
4863 ctxt->nodemem = 0;
4864 ctxt->nodelen = 0;
4865 return;
4866 } else {
4867 /*
4868 * Probably running in SAX mode
4869 */
4870 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004871
Daniel Veillarde0854c32000-08-27 21:12:29 +00004872 input = xmlNewEntityInputStream(ctxt, ent);
4873 xmlPushInput(ctxt, input);
4874 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4875 (RAW == '<') && (NXT(1) == '?') &&
4876 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4877 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4878 xmlParseTextDecl(ctxt);
4879 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4880 /*
4881 * The XML REC instructs us to stop parsing right here
4882 */
4883 ctxt->instate = XML_PARSER_EOF;
4884 return;
4885 }
4886 if (input->standalone == 1) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004887 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
Daniel Veillarde0854c32000-08-27 21:12:29 +00004888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4889 ctxt->sax->error(ctxt->userData,
4890 "external parsed entities cannot be standalone\n");
Daniel Veillarde0854c32000-08-27 21:12:29 +00004891 ctxt->wellFormed = 0;
4892 ctxt->disableSAX = 1;
4893 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004894 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004895 return;
Daniel Veillardcf461992000-03-14 18:30:20 +00004896 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00004897 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004898 } else {
4899 val = ent->content;
4900 if (val == NULL) return;
4901 /*
4902 * inline the entity.
4903 */
4904 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4905 (!ctxt->disableSAX))
4906 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
Daniel Veillard011b63c1999-06-02 17:44:04 +00004907 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004908 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004909}
4910
Daniel Veillard11e00581998-10-24 18:27:49 +00004911/**
4912 * xmlParseEntityRef:
4913 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004914 *
4915 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004916 *
4917 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004918 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004919 * [ WFC: Entity Declared ]
4920 * In a document without any DTD, a document with only an internal DTD
4921 * subset which contains no parameter entity references, or a document
4922 * with "standalone='yes'", the Name given in the entity reference
4923 * must match that in an entity declaration, except that well-formed
4924 * documents need not declare any of the following entities: amp, lt,
4925 * gt, apos, quot. The declaration of a parameter entity must precede
4926 * any reference to it. Similarly, the declaration of a general entity
4927 * must precede any reference to it which appears in a default value in an
4928 * attribute-list declaration. Note that if entities are declared in the
4929 * external subset or in external parameter entities, a non-validating
4930 * processor is not obligated to read and process their declarations;
4931 * for such documents, the rule that an entity must be declared is a
4932 * well-formedness constraint only if standalone='yes'.
4933 *
4934 * [ WFC: Parsed Entity ]
4935 * An entity reference must not contain the name of an unparsed entity
4936 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004937 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004938 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004939xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004940xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004941 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00004942 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004943
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004944 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004945
Daniel Veillardcf461992000-03-14 18:30:20 +00004946 if (RAW == '&') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004947 NEXT;
4948 name = xmlParseName(ctxt);
4949 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004950 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004952 ctxt->sax->error(ctxt->userData,
4953 "xmlParseEntityRef: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004954 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004955 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004956 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004957 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004958 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004959 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00004960 * Ask first SAX for entity resolution, otherwise try the
4961 * predefined set.
4962 */
4963 if (ctxt->sax != NULL) {
4964 if (ctxt->sax->getEntity != NULL)
4965 ent = ctxt->sax->getEntity(ctxt->userData, name);
4966 if (ent == NULL)
4967 ent = xmlGetPredefinedEntity(name);
4968 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004969 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004970 * [ WFC: Entity Declared ]
4971 * In a document without any DTD, a document with only an
4972 * internal DTD subset which contains no parameter entity
4973 * references, or a document with "standalone='yes'", the
4974 * Name given in the entity reference must match that in an
4975 * entity declaration, except that well-formed documents
4976 * need not declare any of the following entities: amp, lt,
4977 * gt, apos, quot.
4978 * The declaration of a parameter entity must precede any
4979 * reference to it.
4980 * Similarly, the declaration of a general entity must
4981 * precede any reference to it which appears in a default
4982 * value in an attribute-list declaration. Note that if
4983 * entities are declared in the external subset or in
4984 * external parameter entities, a non-validating processor
4985 * is not obligated to read and process their declarations;
4986 * for such documents, the rule that an entity must be
4987 * declared is a well-formedness constraint only if
4988 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004989 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004990 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004991 if ((ctxt->standalone == 1) ||
4992 ((ctxt->hasExternalSubset == 0) &&
4993 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00004994 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004996 ctxt->sax->error(ctxt->userData,
4997 "Entity '%s' not defined\n", name);
4998 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004999 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005000 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005001 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005002 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5003 ctxt->sax->warning(ctxt->userData,
5004 "Entity '%s' not defined\n", name);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005005 }
5006 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005007
5008 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005009 * [ WFC: Parsed Entity ]
5010 * An entity reference must not contain the name of an
5011 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005012 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005013 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005014 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5016 ctxt->sax->error(ctxt->userData,
5017 "Entity reference to unparsed entity %s\n", name);
5018 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005019 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005020 }
5021
5022 /*
5023 * [ WFC: No External Entity References ]
5024 * Attribute values cannot contain direct or indirect
5025 * entity references to external entities.
5026 */
5027 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00005028 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005029 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5031 ctxt->sax->error(ctxt->userData,
5032 "Attribute references external entity '%s'\n", name);
5033 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005034 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005035 }
5036 /*
5037 * [ WFC: No < in Attribute Values ]
5038 * The replacement text of any entity referred to directly or
5039 * indirectly in an attribute value (other than "&lt;") must
5040 * not contain a <.
5041 */
5042 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00005043 (ent != NULL) &&
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005044 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005045 (ent->content != NULL) &&
5046 (xmlStrchr(ent->content, '<'))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005047 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5049 ctxt->sax->error(ctxt->userData,
5050 "'<' in entity '%s' is not allowed in attributes values\n", name);
5051 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005052 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005053 }
5054
5055 /*
5056 * Internal check, no parameter entities here ...
5057 */
5058 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005059 switch (ent->etype) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005060 case XML_INTERNAL_PARAMETER_ENTITY:
5061 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005062 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005063 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005064 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005065 "Attempt to reference the parameter entity '%s'\n", name);
5066 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005067 ctxt->disableSAX = 1;
5068 break;
5069 default:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005070 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005071 }
5072 }
5073
5074 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005075 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005076 * A parsed entity must not contain a recursive reference
Daniel Veillardb96e6431999-08-29 21:02:19 +00005077 * to itself, either directly or indirectly.
Daniel Veillardb1059e22000-09-16 14:02:43 +00005078 * Done somewhere else
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005079 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00005080
Daniel Veillard011b63c1999-06-02 17:44:04 +00005081 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005082 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005084 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005085 "xmlParseEntityRef: expecting ';'\n");
5086 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005087 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005088 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005089 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005090 }
5091 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005092 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005093}
Daniel Veillardb1059e22000-09-16 14:02:43 +00005094
Daniel Veillard10a2c651999-12-12 13:03:50 +00005095/**
5096 * xmlParseStringEntityRef:
5097 * @ctxt: an XML parser context
5098 * @str: a pointer to an index in the string
5099 *
5100 * parse ENTITY references declarations, but this version parses it from
5101 * a string value.
5102 *
5103 * [68] EntityRef ::= '&' Name ';'
5104 *
5105 * [ WFC: Entity Declared ]
5106 * In a document without any DTD, a document with only an internal DTD
5107 * subset which contains no parameter entity references, or a document
5108 * with "standalone='yes'", the Name given in the entity reference
5109 * must match that in an entity declaration, except that well-formed
5110 * documents need not declare any of the following entities: amp, lt,
5111 * gt, apos, quot. The declaration of a parameter entity must precede
5112 * any reference to it. Similarly, the declaration of a general entity
5113 * must precede any reference to it which appears in a default value in an
5114 * attribute-list declaration. Note that if entities are declared in the
5115 * external subset or in external parameter entities, a non-validating
5116 * processor is not obligated to read and process their declarations;
5117 * for such documents, the rule that an entity must be declared is a
5118 * well-formedness constraint only if standalone='yes'.
5119 *
5120 * [ WFC: Parsed Entity ]
5121 * An entity reference must not contain the name of an unparsed entity
5122 *
5123 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5124 * is updated to the current location in the string.
5125 */
5126xmlEntityPtr
5127xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5128 xmlChar *name;
5129 const xmlChar *ptr;
5130 xmlChar cur;
5131 xmlEntityPtr ent = NULL;
5132
Daniel Veillardcf461992000-03-14 18:30:20 +00005133 if ((str == NULL) || (*str == NULL))
5134 return(NULL);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005135 ptr = *str;
5136 cur = *ptr;
5137 if (cur == '&') {
5138 ptr++;
5139 cur = *ptr;
5140 name = xmlParseStringName(ctxt, &ptr);
5141 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005142 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5144 ctxt->sax->error(ctxt->userData,
5145 "xmlParseEntityRef: no name\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005146 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005147 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005148 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005149 if (*ptr == ';') {
5150 ptr++;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005151 /*
5152 * Ask first SAX for entity resolution, otherwise try the
5153 * predefined set.
5154 */
5155 if (ctxt->sax != NULL) {
5156 if (ctxt->sax->getEntity != NULL)
5157 ent = ctxt->sax->getEntity(ctxt->userData, name);
5158 if (ent == NULL)
5159 ent = xmlGetPredefinedEntity(name);
5160 }
5161 /*
5162 * [ WFC: Entity Declared ]
5163 * In a document without any DTD, a document with only an
5164 * internal DTD subset which contains no parameter entity
5165 * references, or a document with "standalone='yes'", the
5166 * Name given in the entity reference must match that in an
5167 * entity declaration, except that well-formed documents
5168 * need not declare any of the following entities: amp, lt,
5169 * gt, apos, quot.
5170 * The declaration of a parameter entity must precede any
5171 * reference to it.
5172 * Similarly, the declaration of a general entity must
5173 * precede any reference to it which appears in a default
5174 * value in an attribute-list declaration. Note that if
5175 * entities are declared in the external subset or in
5176 * external parameter entities, a non-validating processor
5177 * is not obligated to read and process their declarations;
5178 * for such documents, the rule that an entity must be
5179 * declared is a well-formedness constraint only if
5180 * standalone='yes'.
5181 */
5182 if (ent == NULL) {
5183 if ((ctxt->standalone == 1) ||
5184 ((ctxt->hasExternalSubset == 0) &&
5185 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005186 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5188 ctxt->sax->error(ctxt->userData,
5189 "Entity '%s' not defined\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005190 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005191 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005192 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005193 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005194 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5195 ctxt->sax->warning(ctxt->userData,
5196 "Entity '%s' not defined\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005197 }
5198 }
5199
5200 /*
5201 * [ WFC: Parsed Entity ]
5202 * An entity reference must not contain the name of an
5203 * unparsed entity
5204 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005205 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005206 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5208 ctxt->sax->error(ctxt->userData,
5209 "Entity reference to unparsed entity %s\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005210 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005211 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005212 }
5213
5214 /*
5215 * [ WFC: No External Entity References ]
5216 * Attribute values cannot contain direct or indirect
5217 * entity references to external entities.
5218 */
5219 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00005220 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005221 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5223 ctxt->sax->error(ctxt->userData,
5224 "Attribute references external entity '%s'\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005225 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005226 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005227 }
5228 /*
5229 * [ WFC: No < in Attribute Values ]
5230 * The replacement text of any entity referred to directly or
5231 * indirectly in an attribute value (other than "&lt;") must
5232 * not contain a <.
5233 */
5234 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5235 (ent != NULL) &&
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005236 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
Daniel Veillard10a2c651999-12-12 13:03:50 +00005237 (ent->content != NULL) &&
5238 (xmlStrchr(ent->content, '<'))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005239 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5241 ctxt->sax->error(ctxt->userData,
5242 "'<' in entity '%s' is not allowed in attributes values\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005243 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005244 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005245 }
5246
5247 /*
5248 * Internal check, no parameter entities here ...
5249 */
5250 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005251 switch (ent->etype) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00005252 case XML_INTERNAL_PARAMETER_ENTITY:
5253 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005254 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256 ctxt->sax->error(ctxt->userData,
5257 "Attempt to reference the parameter entity '%s'\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005258 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005259 ctxt->disableSAX = 1;
5260 break;
5261 default:
Daniel Veillard10a2c651999-12-12 13:03:50 +00005262 break;
5263 }
5264 }
5265
5266 /*
5267 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005268 * A parsed entity must not contain a recursive reference
Daniel Veillard10a2c651999-12-12 13:03:50 +00005269 * to itself, either directly or indirectly.
Daniel Veillardb1059e22000-09-16 14:02:43 +00005270 * Done somewhwere else
Daniel Veillard10a2c651999-12-12 13:03:50 +00005271 */
5272
5273 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005274 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5276 ctxt->sax->error(ctxt->userData,
5277 "xmlParseEntityRef: expecting ';'\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005278 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005279 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005280 }
5281 xmlFree(name);
5282 }
5283 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005284 *str = ptr;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005285 return(ent);
5286}
Daniel Veillard260a68f1998-08-13 03:39:55 +00005287
Daniel Veillard11e00581998-10-24 18:27:49 +00005288/**
5289 * xmlParsePEReference:
5290 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005291 *
5292 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00005293 * The entity content is handled directly by pushing it's content as
5294 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005295 *
5296 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00005297 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005298 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005299 * A parsed entity must not contain a recursive
Daniel Veillardb05deb71999-08-10 19:04:08 +00005300 * reference to itself, either directly or indirectly.
5301 *
5302 * [ WFC: Entity Declared ]
5303 * In a document without any DTD, a document with only an internal DTD
5304 * subset which contains no parameter entity references, or a document
5305 * with "standalone='yes'", ... ... The declaration of a parameter
5306 * entity must precede any reference to it...
5307 *
5308 * [ VC: Entity Declared ]
5309 * In a document with an external subset or external parameter entities
5310 * with "standalone='no'", ... ... The declaration of a parameter entity
5311 * must precede any reference to it...
5312 *
5313 * [ WFC: In DTD ]
5314 * Parameter-entity references may only appear in the DTD.
5315 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005316 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005317void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005318xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005319 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00005320 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00005321 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005322
Daniel Veillardcf461992000-03-14 18:30:20 +00005323 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005324 NEXT;
5325 name = xmlParseName(ctxt);
5326 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005327 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005329 ctxt->sax->error(ctxt->userData,
5330 "xmlParsePEReference: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005331 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005332 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005333 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005334 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005335 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005336 if ((ctxt->sax != NULL) &&
5337 (ctxt->sax->getParameterEntity != NULL))
5338 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5339 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005340 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005341 /*
5342 * [ WFC: Entity Declared ]
5343 * In a document without any DTD, a document with only an
5344 * internal DTD subset which contains no parameter entity
5345 * references, or a document with "standalone='yes'", ...
5346 * ... The declaration of a parameter entity must precede
5347 * any reference to it...
5348 */
5349 if ((ctxt->standalone == 1) ||
5350 ((ctxt->hasExternalSubset == 0) &&
5351 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005352 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00005353 if ((!ctxt->disableSAX) &&
5354 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005355 ctxt->sax->error(ctxt->userData,
5356 "PEReference: %%%s; not found\n", name);
5357 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005358 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005359 } else {
5360 /*
5361 * [ VC: Entity Declared ]
5362 * In a document with an external subset or external
5363 * parameter entities with "standalone='no'", ...
5364 * ... The declaration of a parameter entity must precede
5365 * any reference to it...
5366 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00005367 if ((!ctxt->disableSAX) &&
5368 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005369 ctxt->sax->warning(ctxt->userData,
5370 "PEReference: %%%s; not found\n", name);
5371 ctxt->valid = 0;
5372 }
Daniel Veillardccb09631998-10-27 06:21:04 +00005373 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005374 /*
5375 * Internal checking in case the entity quest barfed
5376 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005377 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5378 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005379 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5380 ctxt->sax->warning(ctxt->userData,
5381 "Internal: %%%s; is not a parameter entity\n", name);
5382 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005383 /*
5384 * TODO !!!
5385 * handle the extra spaces added before and after
5386 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5387 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00005388 input = xmlNewEntityInputStream(ctxt, entity);
5389 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00005390 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5391 (RAW == '<') && (NXT(1) == '?') &&
5392 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5393 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5394 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00005395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5396 /*
5397 * The XML REC instructs us to stop parsing
5398 * right here
5399 */
5400 ctxt->instate = XML_PARSER_EOF;
5401 xmlFree(name);
5402 return;
5403 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005404 }
5405 if (ctxt->token == 0)
5406 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00005407 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005408 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005409 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005410 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005411 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005413 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005414 "xmlParsePEReference: expecting ';'\n");
5415 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005416 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005417 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005418 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005419 }
5420 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005421}
5422
Daniel Veillard11e00581998-10-24 18:27:49 +00005423/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00005424 * xmlParseStringPEReference:
5425 * @ctxt: an XML parser context
5426 * @str: a pointer to an index in the string
5427 *
5428 * parse PEReference declarations
5429 *
5430 * [69] PEReference ::= '%' Name ';'
5431 *
5432 * [ WFC: No Recursion ]
Daniel Veillardb1059e22000-09-16 14:02:43 +00005433 * A parsed entity must not contain a recursive
Daniel Veillard10a2c651999-12-12 13:03:50 +00005434 * reference to itself, either directly or indirectly.
5435 *
5436 * [ WFC: Entity Declared ]
5437 * In a document without any DTD, a document with only an internal DTD
5438 * subset which contains no parameter entity references, or a document
5439 * with "standalone='yes'", ... ... The declaration of a parameter
5440 * entity must precede any reference to it...
5441 *
5442 * [ VC: Entity Declared ]
5443 * In a document with an external subset or external parameter entities
5444 * with "standalone='no'", ... ... The declaration of a parameter entity
5445 * must precede any reference to it...
5446 *
5447 * [ WFC: In DTD ]
5448 * Parameter-entity references may only appear in the DTD.
5449 * NOTE: misleading but this is handled.
5450 *
5451 * Returns the string of the entity content.
5452 * str is updated to the current value of the index
5453 */
5454xmlEntityPtr
5455xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5456 const xmlChar *ptr;
5457 xmlChar cur;
5458 xmlChar *name;
5459 xmlEntityPtr entity = NULL;
5460
5461 if ((str == NULL) || (*str == NULL)) return(NULL);
5462 ptr = *str;
5463 cur = *ptr;
5464 if (cur == '%') {
5465 ptr++;
5466 cur = *ptr;
5467 name = xmlParseStringName(ctxt, &ptr);
5468 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "xmlParseStringPEReference: no name\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005473 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005474 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005475 } else {
5476 cur = *ptr;
5477 if (cur == ';') {
5478 ptr++;
5479 cur = *ptr;
5480 if ((ctxt->sax != NULL) &&
5481 (ctxt->sax->getParameterEntity != NULL))
5482 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5483 name);
5484 if (entity == NULL) {
5485 /*
5486 * [ WFC: Entity Declared ]
5487 * In a document without any DTD, a document with only an
5488 * internal DTD subset which contains no parameter entity
5489 * references, or a document with "standalone='yes'", ...
5490 * ... The declaration of a parameter entity must precede
5491 * any reference to it...
5492 */
5493 if ((ctxt->standalone == 1) ||
5494 ((ctxt->hasExternalSubset == 0) &&
5495 (ctxt->hasPErefs == 0))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005496 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5498 ctxt->sax->error(ctxt->userData,
5499 "PEReference: %%%s; not found\n", name);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005500 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005501 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005502 } else {
5503 /*
5504 * [ VC: Entity Declared ]
5505 * In a document with an external subset or external
5506 * parameter entities with "standalone='no'", ...
5507 * ... The declaration of a parameter entity must
5508 * precede any reference to it...
5509 */
5510 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5511 ctxt->sax->warning(ctxt->userData,
5512 "PEReference: %%%s; not found\n", name);
5513 ctxt->valid = 0;
5514 }
5515 } else {
5516 /*
5517 * Internal checking in case the entity quest barfed
5518 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005519 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5520 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00005521 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5522 ctxt->sax->warning(ctxt->userData,
5523 "Internal: %%%s; is not a parameter entity\n", name);
5524 }
5525 }
5526 ctxt->hasPErefs = 1;
5527 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005528 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5530 ctxt->sax->error(ctxt->userData,
5531 "xmlParseStringPEReference: expecting ';'\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00005532 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005533 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005534 }
5535 xmlFree(name);
5536 }
5537 }
5538 *str = ptr;
5539 return(entity);
5540}
5541
5542/**
Daniel Veillardcf461992000-03-14 18:30:20 +00005543 * xmlParseDocTypeDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00005544 * @ctxt: an XML parser context
5545 *
5546 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005547 *
5548 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5549 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00005550 *
5551 * [ VC: Root Element Type ]
5552 * The Name in the document type declaration must match the element
5553 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005554 */
5555
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005556void
5557xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005558 xmlChar *name = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005559 xmlChar *ExternalID = NULL;
5560 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005561
5562 /*
5563 * We know that '<!DOCTYPE' has been detected.
5564 */
5565 SKIP(9);
5566
5567 SKIP_BLANKS;
5568
5569 /*
5570 * Parse the DOCTYPE name.
5571 */
5572 name = xmlParseName(ctxt);
5573 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005574 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005576 ctxt->sax->error(ctxt->userData,
5577 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005578 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005579 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005580 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005581 ctxt->intSubName = name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005582
5583 SKIP_BLANKS;
5584
5585 /*
5586 * Check for SystemID and ExternalID
5587 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00005588 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005589
5590 if ((URI != NULL) || (ExternalID != NULL)) {
5591 ctxt->hasExternalSubset = 1;
5592 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005593 ctxt->extSubURI = URI;
5594 ctxt->extSubSystem = ExternalID;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005595
Daniel Veillard260a68f1998-08-13 03:39:55 +00005596 SKIP_BLANKS;
5597
Daniel Veillard011b63c1999-06-02 17:44:04 +00005598 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00005599 * Create and update the internal subset.
Daniel Veillard011b63c1999-06-02 17:44:04 +00005600 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005601 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5602 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00005603 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005604
5605 /*
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005606 * Is there any internal subset declarations ?
5607 * they are handled separately in xmlParseInternalSubset()
5608 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005609 if (RAW == '[')
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005610 return;
5611
5612 /*
5613 * We should be at the end of the DOCTYPE declaration.
5614 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005615 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005616 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5618 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5619 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005620 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005621 }
5622 NEXT;
5623}
5624
5625/**
Daniel Veillardcf461992000-03-14 18:30:20 +00005626 * xmlParseInternalsubset:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005627 * @ctxt: an XML parser context
5628 *
5629 * parse the internal subset declaration
5630 *
5631 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5632 */
5633
5634void
5635xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5636 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005637 * Is there any DTD definition ?
5638 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005639 if (RAW == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005640 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005641 NEXT;
5642 /*
5643 * Parse the succession of Markup declarations and
5644 * PEReferences.
5645 * Subsequence (markupdecl | PEReference | S)*
5646 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005647 while (RAW != ']') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005648 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005649 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005650
5651 SKIP_BLANKS;
5652 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00005653 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005654
Daniel Veillard011b63c1999-06-02 17:44:04 +00005655 /*
5656 * Pop-up of finished entities.
5657 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005658 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005659 xmlPopInput(ctxt);
5660
Daniel Veillardc26087b1999-08-30 11:23:51 +00005661 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005662 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5664 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005665 "xmlParseInternalSubset: error detected in Markup declaration\n");
Daniel Veillardb96e6431999-08-29 21:02:19 +00005666 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005667 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005668 break;
5669 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005670 }
Daniel Veillard36650692000-07-21 15:16:39 +00005671 if (RAW == ']') {
5672 NEXT;
5673 SKIP_BLANKS;
5674 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005675 }
5676
5677 /*
5678 * We should be at the end of the DOCTYPE declaration.
5679 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005680 if (RAW != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005681 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005683 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005684 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005685 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005686 }
5687 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005688}
5689
Daniel Veillard11e00581998-10-24 18:27:49 +00005690/**
5691 * xmlParseAttribute:
5692 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005693 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00005694 *
5695 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00005696 *
5697 * [41] Attribute ::= Name Eq AttValue
5698 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005699 * [ WFC: No External Entity References ]
5700 * Attribute values cannot contain direct or indirect entity references
5701 * to external entities.
5702 *
5703 * [ WFC: No < in Attribute Values ]
5704 * The replacement text of any entity referred to directly or indirectly in
5705 * an attribute value (other than "&lt;") must not contain a <.
5706 *
5707 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005708 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00005709 * declared for it.
5710 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005711 * [25] Eq ::= S? '=' S?
5712 *
5713 * With namespace:
5714 *
5715 * [NS 11] Attribute ::= QName Eq AttValue
5716 *
5717 * Also the case QName == xmlns:??? is handled independently as a namespace
5718 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00005719 *
Daniel Veillard517752b1999-04-05 12:20:10 +00005720 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005721 */
5722
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005723xmlChar *
5724xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5725 xmlChar *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005726
Daniel Veillard517752b1999-04-05 12:20:10 +00005727 *value = NULL;
5728 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005729 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005730 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005732 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005733 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005734 ctxt->disableSAX = 1;
Daniel Veillardccb09631998-10-27 06:21:04 +00005735 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005736 }
5737
5738 /*
5739 * read the value
5740 */
5741 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005742 if (RAW == '=') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005743 NEXT;
5744 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00005745 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005746 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005747 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005748 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005750 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005751 "Specification mandate value for attribute %s\n", name);
5752 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005753 ctxt->disableSAX = 1;
5754 xmlFree(name);
Daniel Veillardccb09631998-10-27 06:21:04 +00005755 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005756 }
5757
Daniel Veillardcf461992000-03-14 18:30:20 +00005758 /*
5759 * Check that xml:lang conforms to the specification
Daniel Veillarde0854c32000-08-27 21:12:29 +00005760 * No more registered as an error, just generate a warning now
5761 * since this was deprecated in XML second edition
Daniel Veillardcf461992000-03-14 18:30:20 +00005762 */
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005763 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005764 if (!xmlCheckLanguageID(val)) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00005765 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5766 ctxt->sax->warning(ctxt->userData,
5767 "Malformed value for xml:lang : %s\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +00005768 }
5769 }
5770
5771 /*
5772 * Check that xml:space conforms to the specification
5773 */
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005774 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5775 if (xmlStrEqual(val, BAD_CAST "default"))
Daniel Veillardcf461992000-03-14 18:30:20 +00005776 *(ctxt->space) = 0;
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005777 else if (xmlStrEqual(val, BAD_CAST "preserve"))
Daniel Veillardcf461992000-03-14 18:30:20 +00005778 *(ctxt->space) = 1;
5779 else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005780 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillardcf461992000-03-14 18:30:20 +00005781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5782 ctxt->sax->error(ctxt->userData,
5783"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5784 val);
Daniel Veillardcf461992000-03-14 18:30:20 +00005785 ctxt->wellFormed = 0;
5786 ctxt->disableSAX = 1;
5787 }
5788 }
5789
Daniel Veillard517752b1999-04-05 12:20:10 +00005790 *value = val;
5791 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005792}
5793
Daniel Veillard11e00581998-10-24 18:27:49 +00005794/**
5795 * xmlParseStartTag:
5796 * @ctxt: an XML parser context
5797 *
5798 * parse a start of tag either for rule element or
5799 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005800 *
5801 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5802 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005803 * [ WFC: Unique Att Spec ]
5804 * No attribute name may appear more than once in the same start-tag or
5805 * empty-element tag.
5806 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005807 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5808 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005809 * [ WFC: Unique Att Spec ]
5810 * No attribute name may appear more than once in the same start-tag or
5811 * empty-element tag.
5812 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005813 * With namespace:
5814 *
5815 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5816 *
5817 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00005818 *
Daniel Veillard06047432000-04-24 11:33:38 +00005819 * Returns the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00005820 */
5821
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005822xmlChar *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005823xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005824 xmlChar *name;
5825 xmlChar *attname;
5826 xmlChar *attvalue;
5827 const xmlChar **atts = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00005828 int nbatts = 0;
5829 int maxatts = 0;
5830 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005831
Daniel Veillardcf461992000-03-14 18:30:20 +00005832 if (RAW != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005833 NEXT;
5834
Daniel Veillard517752b1999-04-05 12:20:10 +00005835 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005836 if (name == NULL) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005837 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005839 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005840 "xmlParseStartTag: invalid element name\n");
5841 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005842 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00005843 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005844 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005845
5846 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005847 * Now parse the attributes, it ends up with the ending
5848 *
5849 * (S Attribute)* S?
5850 */
5851 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005852 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005853
5854 while ((IS_CHAR(RAW)) &&
5855 (RAW != '>') &&
5856 ((RAW != '/') || (NXT(1) != '>'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005857 const xmlChar *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005858 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005859
Daniel Veillard517752b1999-04-05 12:20:10 +00005860 attname = xmlParseAttribute(ctxt, &attvalue);
5861 if ((attname != NULL) && (attvalue != NULL)) {
5862 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005863 * [ WFC: Unique Att Spec ]
5864 * No attribute name may appear more than once in the same
5865 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00005866 */
5867 for (i = 0; i < nbatts;i += 2) {
Daniel Veillard8b5dd832000-10-01 20:28:44 +00005868 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005869 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard517752b1999-04-05 12:20:10 +00005870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005871 ctxt->sax->error(ctxt->userData,
5872 "Attribute %s redefined\n",
5873 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00005874 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005875 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00005876 xmlFree(attname);
5877 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005878 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00005879 }
5880 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005881
Daniel Veillard517752b1999-04-05 12:20:10 +00005882 /*
5883 * Add the pair to atts
5884 */
5885 if (atts == NULL) {
5886 maxatts = 10;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005887 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00005888 if (atts == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00005889 xmlGenericError(xmlGenericErrorContext,
5890 "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005891 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00005892 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00005893 }
Daniel Veillard51e3b151999-11-12 17:02:31 +00005894 } else if (nbatts + 4 > maxatts) {
Daniel Veillard517752b1999-04-05 12:20:10 +00005895 maxatts *= 2;
Daniel Veillard4b0755c2000-09-25 14:26:28 +00005896 atts = (const xmlChar **) xmlRealloc((void *) atts,
5897 maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00005898 if (atts == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00005899 xmlGenericError(xmlGenericErrorContext,
5900 "realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005901 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00005902 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00005903 }
5904 }
5905 atts[nbatts++] = attname;
5906 atts[nbatts++] = attvalue;
5907 atts[nbatts] = NULL;
5908 atts[nbatts + 1] = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00005909 } else {
5910 if (attname != NULL)
5911 xmlFree(attname);
5912 if (attvalue != NULL)
5913 xmlFree(attvalue);
Daniel Veillard517752b1999-04-05 12:20:10 +00005914 }
5915
Daniel Veillardb96e6431999-08-29 21:02:19 +00005916failed:
Daniel Veillardcf461992000-03-14 18:30:20 +00005917
5918 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5919 break;
5920 if (!IS_BLANK(RAW)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005921 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardcf461992000-03-14 18:30:20 +00005922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5923 ctxt->sax->error(ctxt->userData,
5924 "attributes construct error\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00005925 ctxt->wellFormed = 0;
5926 ctxt->disableSAX = 1;
5927 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005928 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005929 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005930 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005932 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005933 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005934 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005935 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005936 break;
5937 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005938 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005939 }
5940
5941 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005942 * SAX: Start of Element !
5943 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005944 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5945 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00005946 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00005947
Daniel Veillard517752b1999-04-05 12:20:10 +00005948 if (atts != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005949 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
Daniel Veillard4b0755c2000-09-25 14:26:28 +00005950 xmlFree((void *) atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00005951 }
Daniel Veillard14fff061999-06-22 21:49:07 +00005952 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005953}
5954
Daniel Veillard11e00581998-10-24 18:27:49 +00005955/**
5956 * xmlParseEndTag:
5957 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005958 *
5959 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00005960 *
5961 * [42] ETag ::= '</' Name S? '>'
5962 *
5963 * With namespace
5964 *
Daniel Veillard517752b1999-04-05 12:20:10 +00005965 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00005966 */
5967
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005968void
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005969xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005970 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005971 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005972
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005973 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005974 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005975 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005977 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005978 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005979 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005980 return;
5981 }
5982 SKIP(2);
5983
Daniel Veillard517752b1999-04-05 12:20:10 +00005984 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005985
5986 /*
5987 * We should definitely be at the ending "S? '>'" part
5988 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005989 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005990 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005991 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00005992 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005994 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005995 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005996 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005997 } else
5998 NEXT;
5999
Daniel Veillard517752b1999-04-05 12:20:10 +00006000 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006001 * [ WFC: Element Type Match ]
6002 * The Name in an element's end-tag must match the element type in the
6003 * start-tag.
6004 *
Daniel Veillard14fff061999-06-22 21:49:07 +00006005 */
Daniel Veillardda07c342000-01-25 18:31:22 +00006006 if ((name == NULL) || (ctxt->name == NULL) ||
Daniel Veillard8b5dd832000-10-01 20:28:44 +00006007 (!xmlStrEqual(name, ctxt->name))) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006008 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
Daniel Veillardda07c342000-01-25 18:31:22 +00006009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6010 if ((name != NULL) && (ctxt->name != NULL)) {
6011 ctxt->sax->error(ctxt->userData,
6012 "Opening and ending tag mismatch: %s and %s\n",
6013 ctxt->name, name);
6014 } else if (ctxt->name != NULL) {
6015 ctxt->sax->error(ctxt->userData,
6016 "Ending tag eror for: %s\n", ctxt->name);
6017 } else {
6018 ctxt->sax->error(ctxt->userData,
6019 "Ending tag error: internal error ???\n");
6020 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006021
Daniel Veillardda07c342000-01-25 18:31:22 +00006022 }
Daniel Veillard14fff061999-06-22 21:49:07 +00006023 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006024 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00006025 }
6026
6027 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00006028 * SAX: End of Tag
6029 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006030 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6031 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00006032 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00006033
6034 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006035 xmlFree(name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006036 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006037 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006038 if (oldname != NULL) {
6039#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006040 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006041#endif
6042 xmlFree(oldname);
6043 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006044 return;
6045}
6046
Daniel Veillard11e00581998-10-24 18:27:49 +00006047/**
6048 * xmlParseCDSect:
6049 * @ctxt: an XML parser context
6050 *
6051 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006052 *
6053 * [18] CDSect ::= CDStart CData CDEnd
6054 *
6055 * [19] CDStart ::= '<![CDATA['
6056 *
6057 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6058 *
6059 * [21] CDEnd ::= ']]>'
6060 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006061void
6062xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006063 xmlChar *buf = NULL;
6064 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006065 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00006066 int r, rl;
6067 int s, sl;
6068 int cur, l;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00006069 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006070
Daniel Veillardb05deb71999-08-10 19:04:08 +00006071 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006072 (NXT(2) == '[') && (NXT(3) == 'C') &&
6073 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6074 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6075 (NXT(8) == '[')) {
6076 SKIP(9);
6077 } else
6078 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006079
6080 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillardcf461992000-03-14 18:30:20 +00006081 r = CUR_CHAR(rl);
6082 if (!IS_CHAR(r)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006083 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006085 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006086 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006087 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006088 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006089 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006090 return;
6091 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006092 NEXTL(rl);
6093 s = CUR_CHAR(sl);
6094 if (!IS_CHAR(s)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006095 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006097 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006098 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006099 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006100 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006101 ctxt->instate = XML_PARSER_CONTENT;
6102 return;
6103 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006104 NEXTL(sl);
6105 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006106 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6107 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006108 xmlGenericError(xmlGenericErrorContext,
6109 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006110 return;
6111 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006112 while (IS_CHAR(cur) &&
6113 ((r != ']') || (s != ']') || (cur != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006114 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006115 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006116 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00006117 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006118 xmlGenericError(xmlGenericErrorContext,
6119 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006120 return;
6121 }
6122 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006123 COPY_BUF(rl,buf,len,r);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006124 r = s;
Daniel Veillardcf461992000-03-14 18:30:20 +00006125 rl = sl;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006126 s = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00006127 sl = l;
Daniel Veillardf09e7e32000-10-01 15:53:30 +00006128 count++;
6129 if (count > 50) {
6130 GROW;
6131 count = 0;
6132 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006133 NEXTL(l);
6134 cur = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006135 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006136 buf[len] = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006137 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006138 if (cur != '>') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006139 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006141 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006142 "CData section not finished\n%.50s\n", buf);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006143 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006144 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006145 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006146 return;
6147 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006148 NEXTL(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006149
6150 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00006151 * Ok the buffer is to be consumed as cdata.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006152 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006153 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006154 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillard10a2c651999-12-12 13:03:50 +00006155 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006156 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006157 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006158}
6159
Daniel Veillard11e00581998-10-24 18:27:49 +00006160/**
6161 * xmlParseContent:
6162 * @ctxt: an XML parser context
6163 *
6164 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00006165 *
6166 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6167 */
6168
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006169void
6170xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006171 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006172 while (((RAW != 0) || (ctxt->token != 0)) &&
6173 ((RAW != '<') || (NXT(1) != '/'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006174 const xmlChar *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006175 int cons = ctxt->input->consumed;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006176 xmlChar tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006177
6178 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00006179 * Handle possible processed charrefs.
6180 */
6181 if (ctxt->token != 0) {
6182 xmlParseCharData(ctxt, 0);
6183 }
6184 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00006185 * First case : a Processing Instruction.
6186 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006187 else if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006188 xmlParsePI(ctxt);
6189 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006190
Daniel Veillard260a68f1998-08-13 03:39:55 +00006191 /*
6192 * Second case : a CDSection
6193 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006194 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006195 (NXT(2) == '[') && (NXT(3) == 'C') &&
6196 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6197 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6198 (NXT(8) == '[')) {
6199 xmlParseCDSect(ctxt);
6200 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006201
Daniel Veillard260a68f1998-08-13 03:39:55 +00006202 /*
6203 * Third case : a comment
6204 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006205 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006206 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006207 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006208 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006209 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006210
Daniel Veillard260a68f1998-08-13 03:39:55 +00006211 /*
6212 * Fourth case : a sub-element.
6213 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006214 else if (RAW == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00006215 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006216 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006217
Daniel Veillard260a68f1998-08-13 03:39:55 +00006218 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00006219 * Fifth case : a reference. If if has not been resolved,
6220 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00006221 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00006222
Daniel Veillardcf461992000-03-14 18:30:20 +00006223 else if (RAW == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006224 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006225 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006226
Daniel Veillard260a68f1998-08-13 03:39:55 +00006227 /*
6228 * Last case, text. Note that References are handled directly.
6229 */
6230 else {
6231 xmlParseCharData(ctxt, 0);
6232 }
6233
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006234 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006235 /*
6236 * Pop-up of finished entities.
6237 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006238 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillardbc50b591999-03-01 12:28:53 +00006239 xmlPopInput(ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006240 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006241
Daniel Veillardb96e6431999-08-29 21:02:19 +00006242 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6243 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006244 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006246 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006247 "detected an error in element content\n");
6248 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006249 ctxt->disableSAX = 1;
Daniel Veillarde715dd22000-08-29 18:29:38 +00006250 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006251 break;
6252 }
6253 }
6254}
6255
Daniel Veillard11e00581998-10-24 18:27:49 +00006256/**
6257 * xmlParseElement:
6258 * @ctxt: an XML parser context
6259 *
6260 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00006261 *
6262 * [39] element ::= EmptyElemTag | STag content ETag
6263 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006264 * [ WFC: Element Type Match ]
6265 * The Name in an element's end-tag must match the element type in the
6266 * start-tag.
6267 *
6268 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006269 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00006270 * where the Name matches the element type and one of the following holds:
6271 * - The declaration matches EMPTY and the element has no content.
6272 * - The declaration matches children and the sequence of child elements
6273 * belongs to the language generated by the regular expression in the
6274 * content model, with optional white space (characters matching the
6275 * nonterminal S) between each pair of child elements.
6276 * - The declaration matches Mixed and the content consists of character
6277 * data and child elements whose types match names in the content model.
6278 * - The declaration matches ANY, and the types of any child elements have
6279 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006280 */
6281
Daniel Veillard517752b1999-04-05 12:20:10 +00006282void
Daniel Veillard1e346af1999-02-22 10:33:01 +00006283xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006284 const xmlChar *openTag = CUR_PTR;
6285 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006286 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006287 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00006288 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006289
6290 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00006291 if (ctxt->record_info) {
6292 node_info.begin_pos = ctxt->input->consumed +
6293 (CUR_PTR - ctxt->input->base);
6294 node_info.begin_line = ctxt->input->line;
6295 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006296
Daniel Veillardcf461992000-03-14 18:30:20 +00006297 if (ctxt->spaceNr == 0)
6298 spacePush(ctxt, -1);
6299 else
6300 spacePush(ctxt, *ctxt->space);
6301
Daniel Veillard14fff061999-06-22 21:49:07 +00006302 name = xmlParseStartTag(ctxt);
6303 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006304 spacePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00006305 return;
6306 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006307 namePush(ctxt, name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00006308 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006309
6310 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006311 * [ VC: Root Element Type ]
6312 * The Name in the document type declaration must match the element
6313 * type of the root element.
6314 */
6315 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006316 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillardb05deb71999-08-10 19:04:08 +00006317 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6318
6319 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00006320 * Check for an Empty Element.
6321 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006322 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006323 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006324 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6325 (!ctxt->disableSAX))
Daniel Veillard14fff061999-06-22 21:49:07 +00006326 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006327 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006328 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006329 if (oldname != NULL) {
6330#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006331 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006332#endif
6333 xmlFree(oldname);
6334 }
Daniel Veillard87b95392000-08-12 21:12:04 +00006335 if ( ret != NULL && ctxt->record_info ) {
6336 node_info.end_pos = ctxt->input->consumed +
6337 (CUR_PTR - ctxt->input->base);
6338 node_info.end_line = ctxt->input->line;
6339 node_info.node = ret;
6340 xmlParserAddNodeInfo(ctxt, &node_info);
6341 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006342 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006343 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006344 if (RAW == '>') {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006345 NEXT;
6346 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006347 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006349 ctxt->sax->error(ctxt->userData,
6350 "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00006351 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006352 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006353 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006354
6355 /*
6356 * end of parsing of this node.
6357 */
6358 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006359 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006360 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006361 if (oldname != NULL) {
6362#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006363 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006364#endif
6365 xmlFree(oldname);
6366 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00006367
6368 /*
6369 * Capture end position and add node
6370 */
6371 if ( ret != NULL && ctxt->record_info ) {
6372 node_info.end_pos = ctxt->input->consumed +
6373 (CUR_PTR - ctxt->input->base);
6374 node_info.end_line = ctxt->input->line;
6375 node_info.node = ret;
6376 xmlParserAddNodeInfo(ctxt, &node_info);
6377 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006378 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006379 }
6380
6381 /*
6382 * Parse the content of the element:
6383 */
6384 xmlParseContent(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006385 if (!IS_CHAR(RAW)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006386 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006388 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00006389 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006390 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006391 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006392
6393 /*
6394 * end of parsing of this node.
6395 */
6396 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006397 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006398 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006399 if (oldname != NULL) {
6400#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006401 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006402#endif
6403 xmlFree(oldname);
6404 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006405 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006406 }
6407
6408 /*
6409 * parse the end of tag: '</' should be here.
6410 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006411 xmlParseEndTag(ctxt);
Daniel Veillardc26087b1999-08-30 11:23:51 +00006412
6413 /*
6414 * Capture end position and add node
6415 */
6416 if ( ret != NULL && ctxt->record_info ) {
6417 node_info.end_pos = ctxt->input->consumed +
6418 (CUR_PTR - ctxt->input->base);
6419 node_info.end_line = ctxt->input->line;
6420 node_info.node = ret;
6421 xmlParserAddNodeInfo(ctxt, &node_info);
6422 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006423}
6424
Daniel Veillard11e00581998-10-24 18:27:49 +00006425/**
6426 * xmlParseVersionNum:
6427 * @ctxt: an XML parser context
6428 *
6429 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006430 *
6431 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00006432 *
6433 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006434 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006435xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006436xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006437 xmlChar *buf = NULL;
6438 int len = 0;
6439 int size = 10;
6440 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006441
Daniel Veillard10a2c651999-12-12 13:03:50 +00006442 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6443 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006444 xmlGenericError(xmlGenericErrorContext,
6445 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006446 return(NULL);
6447 }
6448 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00006449 while (((cur >= 'a') && (cur <= 'z')) ||
6450 ((cur >= 'A') && (cur <= 'Z')) ||
6451 ((cur >= '0') && (cur <= '9')) ||
6452 (cur == '_') || (cur == '.') ||
6453 (cur == ':') || (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006454 if (len + 1 >= size) {
6455 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006456 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00006457 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006458 xmlGenericError(xmlGenericErrorContext,
6459 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006460 return(NULL);
6461 }
6462 }
6463 buf[len++] = cur;
6464 NEXT;
6465 cur=CUR;
6466 }
6467 buf[len] = 0;
6468 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006469}
6470
Daniel Veillard11e00581998-10-24 18:27:49 +00006471/**
6472 * xmlParseVersionInfo:
6473 * @ctxt: an XML parser context
6474 *
6475 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006476 *
6477 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6478 *
6479 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00006480 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006481 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00006482 */
6483
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006484xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006485xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006486 xmlChar *version = NULL;
6487 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006488
Daniel Veillardcf461992000-03-14 18:30:20 +00006489 if ((RAW == 'v') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006490 (NXT(2) == 'r') && (NXT(3) == 's') &&
6491 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6492 (NXT(6) == 'n')) {
6493 SKIP(7);
6494 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006495 if (RAW != '=') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006496 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006498 ctxt->sax->error(ctxt->userData,
6499 "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006500 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006501 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006502 return(NULL);
6503 }
6504 NEXT;
6505 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006506 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006507 NEXT;
6508 q = CUR_PTR;
6509 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006510 if (RAW != '"') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006511 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006513 ctxt->sax->error(ctxt->userData,
6514 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006515 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006516 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006517 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006518 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006519 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006520 NEXT;
6521 q = CUR_PTR;
6522 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006523 if (RAW != '\'') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006524 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006526 ctxt->sax->error(ctxt->userData,
6527 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006528 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006529 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006530 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006531 NEXT;
6532 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006533 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006535 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006536 "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006537 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006538 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006539 }
6540 }
6541 return(version);
6542}
6543
Daniel Veillard11e00581998-10-24 18:27:49 +00006544/**
6545 * xmlParseEncName:
6546 * @ctxt: an XML parser context
6547 *
6548 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00006549 *
6550 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00006551 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006552 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006553 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006554xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006555xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006556 xmlChar *buf = NULL;
6557 int len = 0;
6558 int size = 10;
6559 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006560
Daniel Veillard10a2c651999-12-12 13:03:50 +00006561 cur = CUR;
6562 if (((cur >= 'a') && (cur <= 'z')) ||
6563 ((cur >= 'A') && (cur <= 'Z'))) {
6564 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6565 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006566 xmlGenericError(xmlGenericErrorContext,
6567 "malloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006568 return(NULL);
6569 }
6570
6571 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006572 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006573 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00006574 while (((cur >= 'a') && (cur <= 'z')) ||
6575 ((cur >= 'A') && (cur <= 'Z')) ||
6576 ((cur >= '0') && (cur <= '9')) ||
6577 (cur == '.') || (cur == '_') ||
6578 (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006579 if (len + 1 >= size) {
6580 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006581 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00006582 if (buf == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00006583 xmlGenericError(xmlGenericErrorContext,
6584 "realloc of %d byte failed\n", size);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006585 return(NULL);
6586 }
6587 }
6588 buf[len++] = cur;
6589 NEXT;
6590 cur = CUR;
6591 if (cur == 0) {
6592 SHRINK;
6593 GROW;
6594 cur = CUR;
6595 }
6596 }
6597 buf[len] = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006598 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006599 ctxt->errNo = XML_ERR_ENCODING_NAME;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006601 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006602 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006603 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006604 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006605 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006606}
6607
Daniel Veillard11e00581998-10-24 18:27:49 +00006608/**
6609 * xmlParseEncodingDecl:
6610 * @ctxt: an XML parser context
6611 *
6612 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00006613 *
6614 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00006615 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00006616 * this setups the conversion filters.
Daniel Veillard11e00581998-10-24 18:27:49 +00006617 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006618 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006619 */
6620
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006621xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006622xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006623 xmlChar *encoding = NULL;
6624 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006625
6626 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006627 if ((RAW == 'e') && (NXT(1) == 'n') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006628 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6629 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6630 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6631 SKIP(8);
6632 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006633 if (RAW != '=') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006634 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006636 ctxt->sax->error(ctxt->userData,
6637 "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006638 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006639 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006640 return(NULL);
6641 }
6642 NEXT;
6643 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006644 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006645 NEXT;
6646 q = CUR_PTR;
6647 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006648 if (RAW != '"') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006649 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006651 ctxt->sax->error(ctxt->userData,
6652 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006653 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006654 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006655 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006656 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006657 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006658 NEXT;
6659 q = CUR_PTR;
6660 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006661 if (RAW != '\'') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006662 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006664 ctxt->sax->error(ctxt->userData,
6665 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006666 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006667 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006668 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006669 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006670 } else if (RAW == '"'){
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006671 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006673 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006674 "xmlParseEncodingDecl : expected ' or \"\n");
6675 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006676 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006677 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006678 if (encoding != NULL) {
6679 xmlCharEncoding enc;
6680 xmlCharEncodingHandlerPtr handler;
6681
6682 if (ctxt->input->encoding != NULL)
6683 xmlFree((xmlChar *) ctxt->input->encoding);
6684 ctxt->input->encoding = encoding;
6685
6686 enc = xmlParseCharEncoding((const char *) encoding);
6687 /*
6688 * registered set of known encodings
6689 */
6690 if (enc != XML_CHAR_ENCODING_ERROR) {
6691 xmlSwitchEncoding(ctxt, enc);
6692 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6693 xmlFree(encoding);
6694 return(NULL);
6695 }
6696 } else {
6697 /*
6698 * fallback for unknown encodings
6699 */
6700 handler = xmlFindCharEncodingHandler((const char *) encoding);
6701 if (handler != NULL) {
6702 xmlSwitchToEncoding(ctxt, handler);
6703 } else {
6704 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00006705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6706 ctxt->sax->error(ctxt->userData,
6707 "Unsupported encoding %s\n", encoding);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006708 return(NULL);
6709 }
6710 }
6711 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006712 }
6713 return(encoding);
6714}
6715
Daniel Veillard11e00581998-10-24 18:27:49 +00006716/**
6717 * xmlParseSDDecl:
6718 * @ctxt: an XML parser context
6719 *
6720 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00006721 *
6722 * [32] SDDecl ::= S 'standalone' Eq
6723 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00006724 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006725 * [ VC: Standalone Document Declaration ]
6726 * TODO The standalone document declaration must have the value "no"
6727 * if any external markup declarations contain declarations of:
6728 * - attributes with default values, if elements to which these
6729 * attributes apply appear in the document without specifications
6730 * of values for these attributes, or
6731 * - entities (other than amp, lt, gt, apos, quot), if references
6732 * to those entities appear in the document, or
6733 * - attributes with values subject to normalization, where the
6734 * attribute appears in the document with a value which will change
6735 * as a result of normalization, or
6736 * - element types with element content, if white space occurs directly
6737 * within any instance of those types.
6738 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006739 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00006740 */
6741
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006742int
6743xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006744 int standalone = -1;
6745
6746 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006747 if ((RAW == 's') && (NXT(1) == 't') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006748 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6749 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6750 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6751 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6752 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006753 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006754 if (RAW != '=') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006755 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006757 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006758 "XML standalone declaration : expected '='\n");
6759 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006760 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006761 return(standalone);
6762 }
6763 NEXT;
6764 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006765 if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006766 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006767 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006768 standalone = 0;
6769 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006770 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006771 (NXT(2) == 's')) {
6772 standalone = 1;
6773 SKIP(3);
6774 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006775 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006777 ctxt->sax->error(ctxt->userData,
6778 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006779 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006780 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006781 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006782 if (RAW != '\'') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006783 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006785 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006786 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006787 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006788 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006789 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006790 } else if (RAW == '"'){
Daniel Veillard260a68f1998-08-13 03:39:55 +00006791 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006792 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006793 standalone = 0;
6794 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006795 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006796 (NXT(2) == 's')) {
6797 standalone = 1;
6798 SKIP(3);
6799 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006800 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006802 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006803 "standalone accepts only 'yes' or 'no'\n");
6804 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006805 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006806 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006807 if (RAW != '"') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006808 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006810 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006811 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006812 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006813 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006814 NEXT;
6815 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006816 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006818 ctxt->sax->error(ctxt->userData,
6819 "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006820 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006821 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006822 }
6823 }
6824 return(standalone);
6825}
6826
Daniel Veillard11e00581998-10-24 18:27:49 +00006827/**
6828 * xmlParseXMLDecl:
6829 * @ctxt: an XML parser context
6830 *
6831 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00006832 *
6833 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6834 */
6835
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006836void
6837xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006838 xmlChar *version;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006839
6840 /*
6841 * We know that '<?xml' is here.
6842 */
6843 SKIP(5);
6844
Daniel Veillardcf461992000-03-14 18:30:20 +00006845 if (!IS_BLANK(RAW)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006846 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006848 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006849 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006850 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006851 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006852 SKIP_BLANKS;
6853
6854 /*
6855 * We should have the VersionInfo here.
6856 */
6857 version = xmlParseVersionInfo(ctxt);
6858 if (version == NULL)
6859 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00006860 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00006861 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006862
6863 /*
6864 * We may have the encoding declaration
6865 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006866 if (!IS_BLANK(RAW)) {
6867 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006868 SKIP(2);
6869 return;
6870 }
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006871 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006873 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006874 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006875 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006876 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006877 xmlParseEncodingDecl(ctxt);
6878 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 /*
6880 * The XML REC instructs us to stop parsing right here
6881 */
6882 return;
6883 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006884
6885 /*
6886 * We may have the standalone status.
6887 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006888 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
6889 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006890 SKIP(2);
6891 return;
6892 }
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006893 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006895 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006896 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006897 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006898 }
6899 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006900 ctxt->input->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006901
6902 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006903 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006904 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006905 } else if (RAW == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006906 /* Deprecated old WD ... */
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006907 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006909 ctxt->sax->error(ctxt->userData,
6910 "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006911 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006912 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006913 NEXT;
6914 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006915 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006917 ctxt->sax->error(ctxt->userData,
6918 "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006919 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006920 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006921 MOVETO_ENDTAG(CUR_PTR);
6922 NEXT;
6923 }
6924}
6925
Daniel Veillard11e00581998-10-24 18:27:49 +00006926/**
6927 * xmlParseMisc:
6928 * @ctxt: an XML parser context
6929 *
6930 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006931 *
6932 * [27] Misc ::= Comment | PI | S
6933 */
6934
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006935void
6936xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006937 while (((RAW == '<') && (NXT(1) == '?')) ||
6938 ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006939 (NXT(2) == '-') && (NXT(3) == '-')) ||
6940 IS_BLANK(CUR)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006941 if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006942 xmlParsePI(ctxt);
6943 } else if (IS_BLANK(CUR)) {
6944 NEXT;
6945 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00006946 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006947 }
6948}
6949
Daniel Veillard11e00581998-10-24 18:27:49 +00006950/**
Daniel Veillardcf461992000-03-14 18:30:20 +00006951 * xmlParseDocument:
Daniel Veillard11e00581998-10-24 18:27:49 +00006952 * @ctxt: an XML parser context
6953 *
6954 * parse an XML document (and build a tree if using the standard SAX
6955 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00006956 *
6957 * [1] document ::= prolog element Misc*
6958 *
6959 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00006960 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006961 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00006962 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006963 */
6964
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006965int
6966xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006967 xmlChar start[4];
6968 xmlCharEncoding enc;
6969
Daniel Veillardbc765302000-10-01 18:23:35 +00006970 xmlInitParser();
Daniel Veillard260a68f1998-08-13 03:39:55 +00006971
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006972 GROW;
6973
Daniel Veillard260a68f1998-08-13 03:39:55 +00006974 /*
6975 * SAX: beginning of the document processing.
6976 */
Daniel Veillard517752b1999-04-05 12:20:10 +00006977 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00006978 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006979
Daniel Veillardcf461992000-03-14 18:30:20 +00006980 /*
6981 * Get the 4 first bytes and decode the charset
6982 * if enc != XML_CHAR_ENCODING_NONE
6983 * plug some encoding conversion routines.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006984 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006985 start[0] = RAW;
6986 start[1] = NXT(1);
6987 start[2] = NXT(2);
6988 start[3] = NXT(3);
6989 enc = xmlDetectCharEncoding(start, 4);
6990 if (enc != XML_CHAR_ENCODING_NONE) {
6991 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006992 }
6993
Daniel Veillardcf461992000-03-14 18:30:20 +00006994
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006995 if (CUR == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00006996 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006998 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006999 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007000 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007001 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007002
7003 /*
7004 * Check for the XMLDecl in the Prolog.
7005 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007006 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007007 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007008 (NXT(2) == 'x') && (NXT(3) == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00007009 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007010
7011 /*
7012 * Note that we will switch encoding on the fly.
7013 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00007014 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007015 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7016 /*
7017 * The XML REC instructs us to stop parsing right here
7018 */
7019 return(-1);
7020 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007021 ctxt->standalone = ctxt->input->standalone;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007022 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007023 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00007024 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007025 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007026 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007027 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007028
7029 /*
7030 * The Misc part of the Prolog
7031 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007032 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007033 xmlParseMisc(ctxt);
7034
7035 /*
7036 * Then possibly doc type declaration(s) and more Misc
7037 * (doctypedecl Misc*)?
7038 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007039 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007040 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007041 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7042 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7043 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7044 (NXT(8) == 'E')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007045
7046 ctxt->inSubset = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007047 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007048 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007049 ctxt->instate = XML_PARSER_DTD;
7050 xmlParseInternalSubset(ctxt);
7051 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007052
7053 /*
7054 * Create and update the external subset.
7055 */
7056 ctxt->inSubset = 2;
7057 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7058 (!ctxt->disableSAX))
7059 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7060 ctxt->extSubSystem, ctxt->extSubURI);
7061 ctxt->inSubset = 0;
7062
7063
Daniel Veillardb05deb71999-08-10 19:04:08 +00007064 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007065 xmlParseMisc(ctxt);
7066 }
7067
7068 /*
7069 * Time to start parsing the tree itself
7070 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007071 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007072 if (RAW != '<') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007073 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007075 ctxt->sax->error(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00007076 "Start tag expected, '<' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007077 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007078 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007079 ctxt->instate = XML_PARSER_EOF;
7080 } else {
7081 ctxt->instate = XML_PARSER_CONTENT;
7082 xmlParseElement(ctxt);
7083 ctxt->instate = XML_PARSER_EPILOG;
7084
7085
7086 /*
7087 * The Misc part at the end
7088 */
7089 xmlParseMisc(ctxt);
7090
Daniel Veillardcf461992000-03-14 18:30:20 +00007091 if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007092 ctxt->errNo = XML_ERR_DOCUMENT_END;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7094 ctxt->sax->error(ctxt->userData,
7095 "Extra content at the end of the document\n");
7096 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007097 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007098 }
7099 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007100 }
7101
Daniel Veillard260a68f1998-08-13 03:39:55 +00007102 /*
7103 * SAX: end of the document processing.
7104 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007105 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7106 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007107 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillardcf461992000-03-14 18:30:20 +00007108
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007109 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007110 return(0);
7111}
7112
Daniel Veillardb1059e22000-09-16 14:02:43 +00007113/**
7114 * xmlParseExtParsedEnt:
7115 * @ctxt: an XML parser context
7116 *
7117 * parse a genreral parsed entity
7118 * An external general parsed entity is well-formed if it matches the
7119 * production labeled extParsedEnt.
7120 *
7121 * [78] extParsedEnt ::= TextDecl? content
7122 *
7123 * Returns 0, -1 in case of error. the parser context is augmented
7124 * as a result of the parsing.
7125 */
7126
7127int
7128xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7129 xmlChar start[4];
7130 xmlCharEncoding enc;
7131
7132 xmlDefaultSAXHandlerInit();
7133
7134 GROW;
7135
7136 /*
7137 * SAX: beginning of the document processing.
7138 */
7139 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7140 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7141
7142 /*
7143 * Get the 4 first bytes and decode the charset
7144 * if enc != XML_CHAR_ENCODING_NONE
7145 * plug some encoding conversion routines.
7146 */
7147 start[0] = RAW;
7148 start[1] = NXT(1);
7149 start[2] = NXT(2);
7150 start[3] = NXT(3);
7151 enc = xmlDetectCharEncoding(start, 4);
7152 if (enc != XML_CHAR_ENCODING_NONE) {
7153 xmlSwitchEncoding(ctxt, enc);
7154 }
7155
7156
7157 if (CUR == 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007158 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7160 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillardb1059e22000-09-16 14:02:43 +00007161 ctxt->wellFormed = 0;
7162 ctxt->disableSAX = 1;
7163 }
7164
7165 /*
7166 * Check for the XMLDecl in the Prolog.
7167 */
7168 GROW;
7169 if ((RAW == '<') && (NXT(1) == '?') &&
7170 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7171 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7172
7173 /*
7174 * Note that we will switch encoding on the fly.
7175 */
7176 xmlParseXMLDecl(ctxt);
7177 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7178 /*
7179 * The XML REC instructs us to stop parsing right here
7180 */
7181 return(-1);
7182 }
7183 SKIP_BLANKS;
7184 } else {
7185 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7186 }
7187 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7188 ctxt->sax->startDocument(ctxt->userData);
7189
7190 /*
7191 * Doing validity checking on chunk doesn't make sense
7192 */
7193 ctxt->instate = XML_PARSER_CONTENT;
7194 ctxt->validate = 0;
7195 ctxt->depth = 0;
7196
7197 xmlParseContent(ctxt);
7198
7199 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007200 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7202 ctxt->sax->error(ctxt->userData,
7203 "chunk is not well balanced\n");
7204 ctxt->wellFormed = 0;
7205 ctxt->disableSAX = 1;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007206 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007207 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7209 ctxt->sax->error(ctxt->userData,
7210 "extra content at the end of well balanced chunk\n");
7211 ctxt->wellFormed = 0;
7212 ctxt->disableSAX = 1;
Daniel Veillardb1059e22000-09-16 14:02:43 +00007213 }
7214
7215 /*
7216 * SAX: end of the document processing.
7217 */
7218 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7219 (!ctxt->disableSAX))
7220 ctxt->sax->endDocument(ctxt->userData);
7221
7222 if (! ctxt->wellFormed) return(-1);
7223 return(0);
7224}
7225
Daniel Veillardb05deb71999-08-10 19:04:08 +00007226/************************************************************************
7227 * *
Daniel Veillard7f858501999-11-17 17:32:38 +00007228 * Progressive parsing interfaces *
7229 * *
7230 ************************************************************************/
7231
7232/**
7233 * xmlParseLookupSequence:
7234 * @ctxt: an XML parser context
7235 * @first: the first char to lookup
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007236 * @next: the next char to lookup or zero
7237 * @third: the next char to lookup or zero
Daniel Veillard7f858501999-11-17 17:32:38 +00007238 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007239 * Try to find if a sequence (first, next, third) or just (first next) or
7240 * (first) is available in the input stream.
7241 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7242 * to avoid rescanning sequences of bytes, it DOES change the state of the
7243 * parser, do not use liberally.
Daniel Veillard7f858501999-11-17 17:32:38 +00007244 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007245 * Returns the index to the current parsing point if the full sequence
7246 * is available, -1 otherwise.
Daniel Veillard7f858501999-11-17 17:32:38 +00007247 */
7248int
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007249xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7250 xmlChar next, xmlChar third) {
7251 int base, len;
7252 xmlParserInputPtr in;
7253 const xmlChar *buf;
7254
7255 in = ctxt->input;
7256 if (in == NULL) return(-1);
7257 base = in->cur - in->base;
7258 if (base < 0) return(-1);
7259 if (ctxt->checkIndex > base)
7260 base = ctxt->checkIndex;
7261 if (in->buf == NULL) {
7262 buf = in->base;
7263 len = in->length;
7264 } else {
7265 buf = in->buf->buffer->content;
7266 len = in->buf->buffer->use;
7267 }
7268 /* take into account the sequence length */
7269 if (third) len -= 2;
7270 else if (next) len --;
7271 for (;base < len;base++) {
7272 if (buf[base] == first) {
7273 if (third != 0) {
7274 if ((buf[base + 1] != next) ||
7275 (buf[base + 2] != third)) continue;
7276 } else if (next != 0) {
7277 if (buf[base + 1] != next) continue;
7278 }
7279 ctxt->checkIndex = 0;
7280#ifdef DEBUG_PUSH
7281 if (next == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007282 xmlGenericError(xmlGenericErrorContext,
7283 "PP: lookup '%c' found at %d\n",
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007284 first, base);
7285 else if (third == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007286 xmlGenericError(xmlGenericErrorContext,
7287 "PP: lookup '%c%c' found at %d\n",
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007288 first, next, base);
7289 else
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007290 xmlGenericError(xmlGenericErrorContext,
7291 "PP: lookup '%c%c%c' found at %d\n",
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007292 first, next, third, base);
7293#endif
7294 return(base - (in->cur - in->base));
7295 }
7296 }
7297 ctxt->checkIndex = base;
7298#ifdef DEBUG_PUSH
7299 if (next == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007300 xmlGenericError(xmlGenericErrorContext,
7301 "PP: lookup '%c' failed\n", first);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007302 else if (third == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007303 xmlGenericError(xmlGenericErrorContext,
7304 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007305 else
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007306 xmlGenericError(xmlGenericErrorContext,
7307 "PP: lookup '%c%c%c' failed\n", first, next, third);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007308#endif
7309 return(-1);
Daniel Veillard7f858501999-11-17 17:32:38 +00007310}
7311
7312/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00007313 * xmlParseTryOrFinish:
Daniel Veillard7f858501999-11-17 17:32:38 +00007314 * @ctxt: an XML parser context
Daniel Veillard71b656e2000-01-05 14:46:17 +00007315 * @terminate: last chunk indicator
Daniel Veillard7f858501999-11-17 17:32:38 +00007316 *
7317 * Try to progress on parsing
7318 *
7319 * Returns zero if no parsing was possible
7320 */
7321int
Daniel Veillard71b656e2000-01-05 14:46:17 +00007322xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
Daniel Veillard7f858501999-11-17 17:32:38 +00007323 int ret = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007324 int avail;
7325 xmlChar cur, next;
7326
7327#ifdef DEBUG_PUSH
7328 switch (ctxt->instate) {
7329 case XML_PARSER_EOF:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007330 xmlGenericError(xmlGenericErrorContext,
7331 "PP: try EOF\n"); break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007332 case XML_PARSER_START:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007333 xmlGenericError(xmlGenericErrorContext,
7334 "PP: try START\n"); break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007335 case XML_PARSER_MISC:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007336 xmlGenericError(xmlGenericErrorContext,
7337 "PP: try MISC\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007338 case XML_PARSER_COMMENT:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007339 xmlGenericError(xmlGenericErrorContext,
7340 "PP: try COMMENT\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007341 case XML_PARSER_PROLOG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007342 xmlGenericError(xmlGenericErrorContext,
7343 "PP: try PROLOG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007344 case XML_PARSER_START_TAG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007345 xmlGenericError(xmlGenericErrorContext,
7346 "PP: try START_TAG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007347 case XML_PARSER_CONTENT:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007348 xmlGenericError(xmlGenericErrorContext,
7349 "PP: try CONTENT\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007350 case XML_PARSER_CDATA_SECTION:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007351 xmlGenericError(xmlGenericErrorContext,
7352 "PP: try CDATA_SECTION\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007353 case XML_PARSER_END_TAG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007354 xmlGenericError(xmlGenericErrorContext,
7355 "PP: try END_TAG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007356 case XML_PARSER_ENTITY_DECL:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007357 xmlGenericError(xmlGenericErrorContext,
7358 "PP: try ENTITY_DECL\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007359 case XML_PARSER_ENTITY_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007360 xmlGenericError(xmlGenericErrorContext,
7361 "PP: try ENTITY_VALUE\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007362 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007363 xmlGenericError(xmlGenericErrorContext,
7364 "PP: try ATTRIBUTE_VALUE\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007365 case XML_PARSER_DTD:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007366 xmlGenericError(xmlGenericErrorContext,
7367 "PP: try DTD\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007368 case XML_PARSER_EPILOG:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007369 xmlGenericError(xmlGenericErrorContext,
7370 "PP: try EPILOG\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007371 case XML_PARSER_PI:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007372 xmlGenericError(xmlGenericErrorContext,
7373 "PP: try PI\n");break;
Daniel Veillard41e06512000-11-13 11:47:47 +00007374 case XML_PARSER_IGNORE:
7375 xmlGenericError(xmlGenericErrorContext,
7376 "PP: try IGNORE\n");break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007377 }
7378#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00007379
7380 while (1) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007381 /*
7382 * Pop-up of finished entities.
7383 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007384 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007385 xmlPopInput(ctxt);
7386
Daniel Veillardcf461992000-03-14 18:30:20 +00007387 if (ctxt->input ==NULL) break;
7388 if (ctxt->input->buf == NULL)
7389 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007390 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007391 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007392 if (avail < 1)
7393 goto done;
Daniel Veillard7f858501999-11-17 17:32:38 +00007394 switch (ctxt->instate) {
7395 case XML_PARSER_EOF:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007396 /*
7397 * Document parsing is done !
7398 */
7399 goto done;
7400 case XML_PARSER_START:
7401 /*
7402 * Very first chars read from the document flow.
7403 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007404 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007405 if (IS_BLANK(cur)) {
7406 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7407 ctxt->sax->setDocumentLocator(ctxt->userData,
7408 &xmlDefaultSAXLocator);
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007409 ctxt->errNo = XML_ERR_DOCUMENT_START;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7411 ctxt->sax->error(ctxt->userData,
7412 "Extra spaces at the beginning of the document are not allowed\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007413 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007414 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007415 SKIP_BLANKS;
7416 ret++;
Daniel Veillardcf461992000-03-14 18:30:20 +00007417 if (ctxt->input->buf == NULL)
7418 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007419 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007420 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007421 }
7422 if (avail < 2)
7423 goto done;
7424
Daniel Veillardcf461992000-03-14 18:30:20 +00007425 cur = ctxt->input->cur[0];
7426 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007427 if (cur == 0) {
7428 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7429 ctxt->sax->setDocumentLocator(ctxt->userData,
7430 &xmlDefaultSAXLocator);
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007431 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7433 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007434 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007435 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007436 ctxt->instate = XML_PARSER_EOF;
7437#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007438 xmlGenericError(xmlGenericErrorContext,
7439 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007440#endif
7441 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7442 ctxt->sax->endDocument(ctxt->userData);
7443 goto done;
7444 }
7445 if ((cur == '<') && (next == '?')) {
7446 /* PI or XML decl */
7447 if (avail < 5) return(ret);
Daniel Veillard71b656e2000-01-05 14:46:17 +00007448 if ((!terminate) &&
7449 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007450 return(ret);
7451 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7452 ctxt->sax->setDocumentLocator(ctxt->userData,
7453 &xmlDefaultSAXLocator);
Daniel Veillardcf461992000-03-14 18:30:20 +00007454 if ((ctxt->input->cur[2] == 'x') &&
7455 (ctxt->input->cur[3] == 'm') &&
7456 (ctxt->input->cur[4] == 'l') &&
7457 (IS_BLANK(ctxt->input->cur[5]))) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007458 ret += 5;
7459#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007460 xmlGenericError(xmlGenericErrorContext,
7461 "PP: Parsing XML Decl\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007462#endif
7463 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007464 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7465 /*
7466 * The XML REC instructs us to stop parsing right
7467 * here
7468 */
7469 ctxt->instate = XML_PARSER_EOF;
7470 return(0);
7471 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007472 ctxt->standalone = ctxt->input->standalone;
7473 if ((ctxt->encoding == NULL) &&
7474 (ctxt->input->encoding != NULL))
7475 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7476 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7477 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007478 ctxt->sax->startDocument(ctxt->userData);
7479 ctxt->instate = XML_PARSER_MISC;
7480#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007481 xmlGenericError(xmlGenericErrorContext,
7482 "PP: entering MISC\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007483#endif
7484 } else {
7485 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00007486 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7487 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007488 ctxt->sax->startDocument(ctxt->userData);
7489 ctxt->instate = XML_PARSER_MISC;
7490#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007491 xmlGenericError(xmlGenericErrorContext,
7492 "PP: entering MISC\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007493#endif
7494 }
7495 } else {
7496 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7497 ctxt->sax->setDocumentLocator(ctxt->userData,
7498 &xmlDefaultSAXLocator);
7499 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00007500 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7501 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007502 ctxt->sax->startDocument(ctxt->userData);
7503 ctxt->instate = XML_PARSER_MISC;
7504#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007505 xmlGenericError(xmlGenericErrorContext,
7506 "PP: entering MISC\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007507#endif
7508 }
7509 break;
7510 case XML_PARSER_MISC:
7511 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007512 if (ctxt->input->buf == NULL)
7513 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007514 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007515 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007516 if (avail < 2)
7517 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007518 cur = ctxt->input->cur[0];
7519 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007520 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007521 if ((!terminate) &&
7522 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007523 goto done;
7524#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007525 xmlGenericError(xmlGenericErrorContext,
7526 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007527#endif
7528 xmlParsePI(ctxt);
7529 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007530 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007531 if ((!terminate) &&
7532 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007533 goto done;
7534#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007535 xmlGenericError(xmlGenericErrorContext,
7536 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007537#endif
7538 xmlParseComment(ctxt);
7539 ctxt->instate = XML_PARSER_MISC;
7540 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007541 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7542 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7543 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7544 (ctxt->input->cur[8] == 'E')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007545 if ((!terminate) &&
7546 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007547 goto done;
7548#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007549 xmlGenericError(xmlGenericErrorContext,
7550 "PP: Parsing internal subset\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007551#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007552 ctxt->inSubset = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007553 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007554 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007555 ctxt->instate = XML_PARSER_DTD;
7556#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007557 xmlGenericError(xmlGenericErrorContext,
7558 "PP: entering DTD\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007559#endif
7560 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007561 /*
7562 * Create and update the external subset.
7563 */
7564 ctxt->inSubset = 2;
7565 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7566 (ctxt->sax->externalSubset != NULL))
7567 ctxt->sax->externalSubset(ctxt->userData,
7568 ctxt->intSubName, ctxt->extSubSystem,
7569 ctxt->extSubURI);
7570 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007571 ctxt->instate = XML_PARSER_PROLOG;
7572#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007573 xmlGenericError(xmlGenericErrorContext,
7574 "PP: entering PROLOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007575#endif
7576 }
7577 } else if ((cur == '<') && (next == '!') &&
7578 (avail < 9)) {
7579 goto done;
7580 } else {
7581 ctxt->instate = XML_PARSER_START_TAG;
7582#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007583 xmlGenericError(xmlGenericErrorContext,
7584 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007585#endif
7586 }
7587 break;
Daniel Veillard41e06512000-11-13 11:47:47 +00007588 case XML_PARSER_IGNORE:
7589 xmlGenericError(xmlGenericErrorContext,
7590 "PP: internal error, state == IGNORE");
7591 ctxt->instate = XML_PARSER_DTD;
7592#ifdef DEBUG_PUSH
7593 xmlGenericError(xmlGenericErrorContext,
7594 "PP: entering DTD\n");
7595#endif
7596 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007597 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007598 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007599 if (ctxt->input->buf == NULL)
7600 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007601 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007602 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007603 if (avail < 2)
7604 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007605 cur = ctxt->input->cur[0];
7606 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007607 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007608 if ((!terminate) &&
7609 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007610 goto done;
7611#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007612 xmlGenericError(xmlGenericErrorContext,
7613 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007614#endif
7615 xmlParsePI(ctxt);
7616 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007617 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007618 if ((!terminate) &&
7619 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007620 goto done;
7621#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007622 xmlGenericError(xmlGenericErrorContext,
7623 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007624#endif
7625 xmlParseComment(ctxt);
7626 ctxt->instate = XML_PARSER_PROLOG;
7627 } else if ((cur == '<') && (next == '!') &&
7628 (avail < 4)) {
7629 goto done;
7630 } else {
7631 ctxt->instate = XML_PARSER_START_TAG;
7632#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007633 xmlGenericError(xmlGenericErrorContext,
7634 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007635#endif
7636 }
7637 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007638 case XML_PARSER_EPILOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007639 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007640 if (ctxt->input->buf == NULL)
7641 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007642 else
Daniel Veillardcf461992000-03-14 18:30:20 +00007643 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007644 if (avail < 2)
7645 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007646 cur = ctxt->input->cur[0];
7647 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007648 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007649 if ((!terminate) &&
7650 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007651 goto done;
7652#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007655#endif
7656 xmlParsePI(ctxt);
7657 ctxt->instate = XML_PARSER_EPILOG;
7658 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007659 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007660 if ((!terminate) &&
7661 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007662 goto done;
7663#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007664 xmlGenericError(xmlGenericErrorContext,
7665 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007666#endif
7667 xmlParseComment(ctxt);
7668 ctxt->instate = XML_PARSER_EPILOG;
7669 } else if ((cur == '<') && (next == '!') &&
7670 (avail < 4)) {
7671 goto done;
7672 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007673 ctxt->errNo = XML_ERR_DOCUMENT_END;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7675 ctxt->sax->error(ctxt->userData,
7676 "Extra content at the end of the document\n");
7677 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007678 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007679 ctxt->instate = XML_PARSER_EOF;
7680#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007681 xmlGenericError(xmlGenericErrorContext,
7682 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007683#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007684 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7685 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007686 ctxt->sax->endDocument(ctxt->userData);
7687 goto done;
7688 }
7689 break;
7690 case XML_PARSER_START_TAG: {
7691 xmlChar *name, *oldname;
7692
Daniel Veillardcf461992000-03-14 18:30:20 +00007693 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007694 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007695 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007696 if (cur != '<') {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007697 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7699 ctxt->sax->error(ctxt->userData,
7700 "Start tag expect, '<' not found\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007701 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007702 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007703 ctxt->instate = XML_PARSER_EOF;
7704#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007705 xmlGenericError(xmlGenericErrorContext,
7706 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007707#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007708 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7709 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007710 ctxt->sax->endDocument(ctxt->userData);
7711 goto done;
7712 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00007713 if ((!terminate) &&
7714 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007715 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007716 if (ctxt->spaceNr == 0)
7717 spacePush(ctxt, -1);
7718 else
7719 spacePush(ctxt, *ctxt->space);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007720 name = xmlParseStartTag(ctxt);
7721 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007722 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007723 ctxt->instate = XML_PARSER_EOF;
7724#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007725 xmlGenericError(xmlGenericErrorContext,
7726 "PP: entering EOF\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007727#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00007728 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7729 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007730 ctxt->sax->endDocument(ctxt->userData);
7731 goto done;
7732 }
7733 namePush(ctxt, xmlStrdup(name));
7734
7735 /*
7736 * [ VC: Root Element Type ]
7737 * The Name in the document type declaration must match
7738 * the element type of the root element.
7739 */
7740 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007741 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007742 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7743
7744 /*
7745 * Check for an Empty Element.
7746 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007747 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007748 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00007749 if ((ctxt->sax != NULL) &&
7750 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007751 ctxt->sax->endElement(ctxt->userData, name);
7752 xmlFree(name);
7753 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007754 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007755 if (oldname != NULL) {
7756#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007757 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007758#endif
7759 xmlFree(oldname);
7760 }
7761 if (ctxt->name == NULL) {
7762 ctxt->instate = XML_PARSER_EPILOG;
7763#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007764 xmlGenericError(xmlGenericErrorContext,
7765 "PP: entering EPILOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007766#endif
7767 } else {
7768 ctxt->instate = XML_PARSER_CONTENT;
7769#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007770 xmlGenericError(xmlGenericErrorContext,
7771 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007772#endif
7773 }
7774 break;
7775 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007776 if (RAW == '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007777 NEXT;
7778 } else {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007779 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7781 ctxt->sax->error(ctxt->userData,
7782 "Couldn't find end of Start Tag %s\n",
7783 name);
7784 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007785 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007786
7787 /*
7788 * end of parsing of this node.
7789 */
7790 nodePop(ctxt);
7791 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007792 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007793 if (oldname != NULL) {
7794#ifdef DEBUG_STACK
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007795 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007796#endif
7797 xmlFree(oldname);
7798 }
7799 }
7800 xmlFree(name);
7801 ctxt->instate = XML_PARSER_CONTENT;
7802#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007803 xmlGenericError(xmlGenericErrorContext,
7804 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007805#endif
7806 break;
7807 }
Daniel Veillarde715dd22000-08-29 18:29:38 +00007808 case XML_PARSER_CONTENT: {
7809 const xmlChar *test;
7810 int cons;
7811 xmlChar tok;
7812
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007813 /*
7814 * Handle preparsed entities and charRef
7815 */
7816 if (ctxt->token != 0) {
7817 xmlChar cur[2] = { 0 , 0 } ;
7818
7819 cur[0] = (xmlChar) ctxt->token;
Daniel Veillardcf461992000-03-14 18:30:20 +00007820 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7821 (ctxt->sax->characters != NULL))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007822 ctxt->sax->characters(ctxt->userData, cur, 1);
7823 ctxt->token = 0;
7824 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007825 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007826 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00007827 cur = ctxt->input->cur[0];
7828 next = ctxt->input->cur[1];
Daniel Veillarde715dd22000-08-29 18:29:38 +00007829
7830 test = CUR_PTR;
7831 cons = ctxt->input->consumed;
7832 tok = ctxt->token;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007833 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007834 if ((!terminate) &&
7835 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007836 goto done;
7837#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007838 xmlGenericError(xmlGenericErrorContext,
7839 "PP: Parsing PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007840#endif
7841 xmlParsePI(ctxt);
7842 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007843 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007844 if ((!terminate) &&
7845 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007846 goto done;
7847#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: Parsing Comment\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007850#endif
7851 xmlParseComment(ctxt);
7852 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00007853 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7854 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7855 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7856 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7857 (ctxt->input->cur[8] == '[')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007858 SKIP(9);
7859 ctxt->instate = XML_PARSER_CDATA_SECTION;
7860#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007861 xmlGenericError(xmlGenericErrorContext,
7862 "PP: entering CDATA_SECTION\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007863#endif
7864 break;
7865 } else if ((cur == '<') && (next == '!') &&
7866 (avail < 9)) {
7867 goto done;
7868 } else if ((cur == '<') && (next == '/')) {
7869 ctxt->instate = XML_PARSER_END_TAG;
7870#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: entering END_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007873#endif
7874 break;
7875 } else if (cur == '<') {
7876 ctxt->instate = XML_PARSER_START_TAG;
7877#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007878 xmlGenericError(xmlGenericErrorContext,
7879 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007880#endif
7881 break;
7882 } else if (cur == '&') {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007883 if ((!terminate) &&
7884 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007885 goto done;
7886#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007887 xmlGenericError(xmlGenericErrorContext,
7888 "PP: Parsing Reference\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007889#endif
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007890 xmlParseReference(ctxt);
7891 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007892 /* TODO Avoid the extra copy, handle directly !!! */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007893 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00007894 * Goal of the following test is:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007895 * - minimize calls to the SAX 'character' callback
7896 * when they are mergeable
7897 * - handle an problem for isBlank when we only parse
7898 * a sequence of blank chars and the next one is
7899 * not available to check against '<' presence.
7900 * - tries to homogenize the differences in SAX
7901 * callbacks beween the push and pull versions
7902 * of the parser.
7903 */
7904 if ((ctxt->inputNr == 1) &&
7905 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007906 if ((!terminate) &&
7907 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007908 goto done;
7909 }
7910 ctxt->checkIndex = 0;
7911#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: Parsing char data\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007914#endif
7915 xmlParseCharData(ctxt, 0);
7916 }
7917 /*
7918 * Pop-up of finished entities.
7919 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007920 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007921 xmlPopInput(ctxt);
Daniel Veillarde715dd22000-08-29 18:29:38 +00007922 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7923 (tok == ctxt->token)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00007924 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde715dd22000-08-29 18:29:38 +00007925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7926 ctxt->sax->error(ctxt->userData,
7927 "detected an error in element content\n");
Daniel Veillarde715dd22000-08-29 18:29:38 +00007928 ctxt->wellFormed = 0;
7929 ctxt->disableSAX = 1;
7930 ctxt->instate = XML_PARSER_EOF;
7931 break;
7932 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007933 break;
Daniel Veillarde715dd22000-08-29 18:29:38 +00007934 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007935 case XML_PARSER_CDATA_SECTION: {
7936 /*
7937 * The Push mode need to have the SAX callback for
7938 * cdataBlock merge back contiguous callbacks.
7939 */
7940 int base;
7941
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007942 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7943 if (base < 0) {
7944 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007945 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007946 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00007947 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007948 XML_PARSER_BIG_BUFFER_SIZE);
7949 }
7950 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7951 ctxt->checkIndex = 0;
7952 }
7953 goto done;
7954 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007955 if ((ctxt->sax != NULL) && (base > 0) &&
7956 (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007957 if (ctxt->sax->cdataBlock != NULL)
7958 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00007959 ctxt->input->cur, base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007960 }
7961 SKIP(base + 3);
7962 ctxt->checkIndex = 0;
7963 ctxt->instate = XML_PARSER_CONTENT;
7964#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007965 xmlGenericError(xmlGenericErrorContext,
7966 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007967#endif
7968 }
7969 break;
7970 }
Daniel Veillard5e5c6231999-12-29 12:49:06 +00007971 case XML_PARSER_END_TAG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007972 if (avail < 2)
7973 goto done;
Daniel Veillard71b656e2000-01-05 14:46:17 +00007974 if ((!terminate) &&
7975 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007976 goto done;
7977 xmlParseEndTag(ctxt);
7978 if (ctxt->name == NULL) {
7979 ctxt->instate = XML_PARSER_EPILOG;
7980#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007981 xmlGenericError(xmlGenericErrorContext,
7982 "PP: entering EPILOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007983#endif
7984 } else {
7985 ctxt->instate = XML_PARSER_CONTENT;
7986#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00007987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007989#endif
7990 }
7991 break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007992 case XML_PARSER_DTD: {
7993 /*
7994 * Sorry but progressive parsing of the internal subset
7995 * is not expected to be supported. We first check that
7996 * the full content of the internal subset is available and
7997 * the parsing is launched only at that point.
7998 * Internal subset ends up with "']' S? '>'" in an unescaped
7999 * section and not in a ']]>' sequence which are conditional
8000 * sections (whoever argued to keep that crap in XML deserve
8001 * a place in hell !).
8002 */
8003 int base, i;
8004 xmlChar *buf;
8005 xmlChar quote = 0;
8006
Daniel Veillardcf461992000-03-14 18:30:20 +00008007 base = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008008 if (base < 0) return(0);
8009 if (ctxt->checkIndex > base)
8010 base = ctxt->checkIndex;
Daniel Veillardcf461992000-03-14 18:30:20 +00008011 buf = ctxt->input->buf->buffer->content;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008012 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8013 base++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008014 if (quote != 0) {
8015 if (buf[base] == quote)
8016 quote = 0;
8017 continue;
8018 }
8019 if (buf[base] == '"') {
8020 quote = '"';
8021 continue;
8022 }
8023 if (buf[base] == '\'') {
8024 quote = '\'';
8025 continue;
8026 }
8027 if (buf[base] == ']') {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008028 if ((unsigned int) base +1 >=
8029 ctxt->input->buf->buffer->use)
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008030 break;
8031 if (buf[base + 1] == ']') {
8032 /* conditional crap, skip both ']' ! */
8033 base++;
8034 continue;
8035 }
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008036 for (i = 0;
8037 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8038 i++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008039 if (buf[base + i] == '>')
8040 goto found_end_int_subset;
8041 }
8042 break;
8043 }
8044 }
8045 /*
8046 * We didn't found the end of the Internal subset
8047 */
8048 if (quote == 0)
8049 ctxt->checkIndex = base;
8050#ifdef DEBUG_PUSH
8051 if (next == 0)
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008052 xmlGenericError(xmlGenericErrorContext,
8053 "PP: lookup of int subset end filed\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008054#endif
8055 goto done;
8056
8057found_end_int_subset:
8058 xmlParseInternalSubset(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008059 ctxt->inSubset = 2;
8060 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8061 (ctxt->sax->externalSubset != NULL))
8062 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8063 ctxt->extSubSystem, ctxt->extSubURI);
8064 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008065 ctxt->instate = XML_PARSER_PROLOG;
8066 ctxt->checkIndex = 0;
8067#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: entering PROLOG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008070#endif
8071 break;
8072 }
Daniel Veillard7f858501999-11-17 17:32:38 +00008073 case XML_PARSER_COMMENT:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008074 xmlGenericError(xmlGenericErrorContext,
8075 "PP: internal error, state == COMMENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008076 ctxt->instate = XML_PARSER_CONTENT;
8077#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008078 xmlGenericError(xmlGenericErrorContext,
8079 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008080#endif
8081 break;
8082 case XML_PARSER_PI:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008083 xmlGenericError(xmlGenericErrorContext,
8084 "PP: internal error, state == PI\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008085 ctxt->instate = XML_PARSER_CONTENT;
8086#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: entering CONTENT\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008089#endif
8090 break;
8091 case XML_PARSER_ENTITY_DECL:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008092 xmlGenericError(xmlGenericErrorContext,
8093 "PP: internal error, state == ENTITY_DECL\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008094 ctxt->instate = XML_PARSER_DTD;
8095#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008096 xmlGenericError(xmlGenericErrorContext,
8097 "PP: entering DTD\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008098#endif
8099 break;
8100 case XML_PARSER_ENTITY_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008101 xmlGenericError(xmlGenericErrorContext,
8102 "PP: internal error, state == ENTITY_VALUE\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008103 ctxt->instate = XML_PARSER_CONTENT;
8104#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008105 xmlGenericError(xmlGenericErrorContext,
8106 "PP: entering DTD\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008107#endif
8108 break;
8109 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: internal error, state == ATTRIBUTE_VALUE\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008112 ctxt->instate = XML_PARSER_START_TAG;
8113#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008114 xmlGenericError(xmlGenericErrorContext,
8115 "PP: entering START_TAG\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008116#endif
8117 break;
Daniel Veillardcf461992000-03-14 18:30:20 +00008118 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: internal error, state == SYSTEM_LITERAL\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00008121 ctxt->instate = XML_PARSER_START_TAG;
8122#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008123 xmlGenericError(xmlGenericErrorContext,
8124 "PP: entering START_TAG\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00008125#endif
8126 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00008127 }
8128 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008129done:
8130#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008131 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008132#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00008133 return(ret);
8134}
8135
8136/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00008137 * xmlParseTry:
8138 * @ctxt: an XML parser context
8139 *
8140 * Try to progress on parsing
8141 *
8142 * Returns zero if no parsing was possible
8143 */
8144int
8145xmlParseTry(xmlParserCtxtPtr ctxt) {
8146 return(xmlParseTryOrFinish(ctxt, 0));
8147}
8148
8149/**
Daniel Veillard7f858501999-11-17 17:32:38 +00008150 * xmlParseChunk:
8151 * @ctxt: an XML parser context
8152 * @chunk: an char array
8153 * @size: the size in byte of the chunk
8154 * @terminate: last chunk indicator
8155 *
8156 * Parse a Chunk of memory
8157 *
8158 * Returns zero if no error, the xmlParserErrors otherwise.
8159 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008160int
Daniel Veillard7f858501999-11-17 17:32:38 +00008161xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8162 int terminate) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00008163 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008164 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8165 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8166 int cur = ctxt->input->cur - ctxt->input->base;
8167
Daniel Veillarda819dac1999-11-24 18:04:22 +00008168 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008169 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8170 ctxt->input->cur = ctxt->input->base + cur;
8171#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008172 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008173#endif
8174
Daniel Veillardd0f7f742000-02-02 17:42:48 +00008175 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8176 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008177 } else if (ctxt->instate != XML_PARSER_EOF)
Daniel Veillardc2def842000-11-07 14:21:01 +00008178 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8179 xmlParserInputBufferPtr in = ctxt->input->buf;
8180 int nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8181 if (nbchars < 0) {
8182 xmlGenericError(xmlGenericErrorContext,
8183 "xmlParseChunk: encoder error\n");
8184 return(XML_ERR_INVALID_ENCODING);
8185 }
8186 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00008187 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008188 if (terminate) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008189 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008190 * Check for termination
8191 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008192 if ((ctxt->instate != XML_PARSER_EOF) &&
8193 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008194 ctxt->errNo = XML_ERR_DOCUMENT_END;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8196 ctxt->sax->error(ctxt->userData,
8197 "Extra content at the end of the document\n");
8198 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008199 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008200 }
8201 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008202 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8203 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008204 ctxt->sax->endDocument(ctxt->userData);
8205 }
8206 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard7f858501999-11-17 17:32:38 +00008207 }
8208 return((xmlParserErrors) ctxt->errNo);
8209}
8210
8211/************************************************************************
8212 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008213 * I/O front end functions to the parser *
8214 * *
8215 ************************************************************************/
8216
Daniel Veillard11e00581998-10-24 18:27:49 +00008217/**
Daniel Veillardb1059e22000-09-16 14:02:43 +00008218 * xmlStopParser:
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00008219 * @ctxt: an XML parser context
8220 *
8221 * Blocks further parser processing
8222 */
8223void
8224xmlStopParser(xmlParserCtxtPtr ctxt) {
8225 ctxt->instate = XML_PARSER_EOF;
8226 if (ctxt->input != NULL)
8227 ctxt->input->cur = BAD_CAST"";
8228}
8229
8230/**
8231 * xmlCreatePushParserCtxt:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008232 * @sax: a SAX handler
8233 * @user_data: The user data returned on SAX callbacks
8234 * @chunk: a pointer to an array of chars
8235 * @size: number of chars in the array
8236 * @filename: an optional file name or URI
8237 *
8238 * Create a parser context for using the XML parser in push mode
8239 * To allow content encoding detection, @size should be >= 4
8240 * The value of @filename is used for fetching external entities
8241 * and error/warning reports.
8242 *
8243 * Returns the new parser context or NULL
8244 */
8245xmlParserCtxtPtr
8246xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8247 const char *chunk, int size, const char *filename) {
8248 xmlParserCtxtPtr ctxt;
8249 xmlParserInputPtr inputStream;
8250 xmlParserInputBufferPtr buf;
8251 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8252
8253 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008254 * plug some encoding conversion routines
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008255 */
8256 if ((chunk != NULL) && (size >= 4))
Daniel Veillardcf461992000-03-14 18:30:20 +00008257 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008258
8259 buf = xmlAllocParserInputBuffer(enc);
8260 if (buf == NULL) return(NULL);
8261
8262 ctxt = xmlNewParserCtxt();
8263 if (ctxt == NULL) {
8264 xmlFree(buf);
8265 return(NULL);
8266 }
8267 if (sax != NULL) {
8268 if (ctxt->sax != &xmlDefaultSAXHandler)
8269 xmlFree(ctxt->sax);
8270 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8271 if (ctxt->sax == NULL) {
8272 xmlFree(buf);
8273 xmlFree(ctxt);
8274 return(NULL);
8275 }
8276 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8277 if (user_data != NULL)
8278 ctxt->userData = user_data;
8279 }
8280 if (filename == NULL) {
8281 ctxt->directory = NULL;
8282 } else {
8283 ctxt->directory = xmlParserGetDirectory(filename);
8284 }
8285
8286 inputStream = xmlNewInputStream(ctxt);
8287 if (inputStream == NULL) {
8288 xmlFreeParserCtxt(ctxt);
8289 return(NULL);
8290 }
8291
8292 if (filename == NULL)
8293 inputStream->filename = NULL;
8294 else
8295 inputStream->filename = xmlMemStrdup(filename);
8296 inputStream->buf = buf;
8297 inputStream->base = inputStream->buf->buffer->content;
8298 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +00008299 if (enc != XML_CHAR_ENCODING_NONE) {
8300 xmlSwitchEncoding(ctxt, enc);
8301 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008302
8303 inputPush(ctxt, inputStream);
8304
8305 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8306 (ctxt->input->buf != NULL)) {
8307 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8308#ifdef DEBUG_PUSH
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008309 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008310#endif
8311 }
8312
8313 return(ctxt);
8314}
8315
8316/**
Daniel Veillard5e873c42000-04-12 13:27:38 +00008317 * xmlCreateIOParserCtxt:
8318 * @sax: a SAX handler
8319 * @user_data: The user data returned on SAX callbacks
8320 * @ioread: an I/O read function
8321 * @ioclose: an I/O close function
8322 * @ioctx: an I/O handler
8323 * @enc: the charset encoding if known
8324 *
8325 * Create a parser context for using the XML parser with an existing
8326 * I/O stream
8327 *
8328 * Returns the new parser context or NULL
8329 */
8330xmlParserCtxtPtr
8331xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8332 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8333 void *ioctx, xmlCharEncoding enc) {
8334 xmlParserCtxtPtr ctxt;
8335 xmlParserInputPtr inputStream;
8336 xmlParserInputBufferPtr buf;
8337
8338 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8339 if (buf == NULL) return(NULL);
8340
8341 ctxt = xmlNewParserCtxt();
8342 if (ctxt == NULL) {
8343 xmlFree(buf);
8344 return(NULL);
8345 }
8346 if (sax != NULL) {
8347 if (ctxt->sax != &xmlDefaultSAXHandler)
8348 xmlFree(ctxt->sax);
8349 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8350 if (ctxt->sax == NULL) {
8351 xmlFree(buf);
8352 xmlFree(ctxt);
8353 return(NULL);
8354 }
8355 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8356 if (user_data != NULL)
8357 ctxt->userData = user_data;
8358 }
8359
8360 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8361 if (inputStream == NULL) {
8362 xmlFreeParserCtxt(ctxt);
8363 return(NULL);
8364 }
8365 inputPush(ctxt, inputStream);
8366
8367 return(ctxt);
8368}
8369
Daniel Veillardb1059e22000-09-16 14:02:43 +00008370/************************************************************************
8371 * *
8372 * Front ends when parsing a Dtd *
8373 * *
8374 ************************************************************************/
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008375
8376/**
Daniel Veillard2ffc3592000-10-30 15:36:47 +00008377 * xmlIOParseDTD:
8378 * @sax: the SAX handler block or NULL
8379 * @input: an Input Buffer
8380 * @enc: the charset encoding if known
8381 *
8382 * Load and parse a DTD
8383 *
8384 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillarda4964b72000-10-31 18:23:44 +00008385 * @input will be freed at parsing end.
Daniel Veillard2ffc3592000-10-30 15:36:47 +00008386 */
8387
8388xmlDtdPtr
8389xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8390 xmlCharEncoding enc) {
8391 xmlDtdPtr ret = NULL;
8392 xmlParserCtxtPtr ctxt;
8393 xmlParserInputPtr pinput = NULL;
8394
8395 if (input == NULL)
8396 return(NULL);
8397
8398 ctxt = xmlNewParserCtxt();
8399 if (ctxt == NULL) {
8400 return(NULL);
8401 }
8402
8403 /*
8404 * Set-up the SAX context
8405 */
8406 if (sax != NULL) {
8407 if (ctxt->sax != NULL)
8408 xmlFree(ctxt->sax);
8409 ctxt->sax = sax;
8410 ctxt->userData = NULL;
8411 }
8412
8413 /*
8414 * generate a parser input from the I/O handler
8415 */
8416
8417 pinput = xmlNewIOInputStream(ctxt, input, enc);
8418 if (pinput == NULL) {
8419 if (sax != NULL) ctxt->sax = NULL;
8420 xmlFreeParserCtxt(ctxt);
8421 return(NULL);
8422 }
8423
8424 /*
8425 * plug some encoding conversion routines here.
8426 */
8427 xmlPushInput(ctxt, pinput);
8428
8429 pinput->filename = NULL;
8430 pinput->line = 1;
8431 pinput->col = 1;
8432 pinput->base = ctxt->input->cur;
8433 pinput->cur = ctxt->input->cur;
8434 pinput->free = NULL;
8435
8436 /*
8437 * let's parse that entity knowing it's an external subset.
8438 */
8439 ctxt->inSubset = 2;
8440 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8441 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8442 BAD_CAST "none", BAD_CAST "none");
8443 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8444
8445 if (ctxt->myDoc != NULL) {
8446 if (ctxt->wellFormed) {
8447 ret = ctxt->myDoc->extSubset;
8448 ctxt->myDoc->extSubset = NULL;
8449 } else {
8450 ret = NULL;
8451 }
8452 xmlFreeDoc(ctxt->myDoc);
8453 ctxt->myDoc = NULL;
8454 }
8455 if (sax != NULL) ctxt->sax = NULL;
8456 xmlFreeParserCtxt(ctxt);
8457
8458 return(ret);
8459}
8460
8461/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008462 * xmlSAXParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +00008463 * @sax: the SAX handler block
8464 * @ExternalID: a NAME* containing the External ID of the DTD
8465 * @SystemID: a NAME* containing the URL to the DTD
8466 *
8467 * Load and parse an external subset.
8468 *
8469 * Returns the resulting xmlDtdPtr or NULL in case of error.
8470 */
8471
8472xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008473xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8474 const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008475 xmlDtdPtr ret = NULL;
8476 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +00008477 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008478 xmlCharEncoding enc;
8479
8480 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8481
Daniel Veillardb05deb71999-08-10 19:04:08 +00008482 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +00008483 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008484 return(NULL);
8485 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00008486
8487 /*
8488 * Set-up the SAX context
8489 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00008490 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008491 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00008492 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008493 ctxt->sax = sax;
8494 ctxt->userData = NULL;
8495 }
8496
8497 /*
8498 * Ask the Entity resolver to load the damn thing
8499 */
8500
8501 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8502 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8503 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +00008504 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008505 xmlFreeParserCtxt(ctxt);
8506 return(NULL);
8507 }
8508
8509 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008510 * plug some encoding conversion routines here.
Daniel Veillard011b63c1999-06-02 17:44:04 +00008511 */
8512 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00008513 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008514 xmlSwitchEncoding(ctxt, enc);
8515
Daniel Veillardb05deb71999-08-10 19:04:08 +00008516 if (input->filename == NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00008517 input->filename = (char *) xmlStrdup(SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008518 input->line = 1;
8519 input->col = 1;
8520 input->base = ctxt->input->cur;
8521 input->cur = ctxt->input->cur;
8522 input->free = NULL;
8523
8524 /*
8525 * let's parse that entity knowing it's an external subset.
8526 */
Daniel Veillard06047432000-04-24 11:33:38 +00008527 ctxt->inSubset = 2;
8528 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8529 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8530 ExternalID, SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008531 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8532
8533 if (ctxt->myDoc != NULL) {
8534 if (ctxt->wellFormed) {
Daniel Veillard06047432000-04-24 11:33:38 +00008535 ret = ctxt->myDoc->extSubset;
8536 ctxt->myDoc->extSubset = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008537 } else {
8538 ret = NULL;
8539 }
8540 xmlFreeDoc(ctxt->myDoc);
8541 ctxt->myDoc = NULL;
8542 }
Daniel Veillard97fea181999-06-26 23:07:37 +00008543 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008544 xmlFreeParserCtxt(ctxt);
8545
8546 return(ret);
8547}
8548
8549/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008550 * xmlParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +00008551 * @ExternalID: a NAME* containing the External ID of the DTD
8552 * @SystemID: a NAME* containing the URL to the DTD
8553 *
8554 * Load and parse an external subset.
8555 *
8556 * Returns the resulting xmlDtdPtr or NULL in case of error.
8557 */
8558
8559xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008560xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008561 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8562}
8563
Daniel Veillardb1059e22000-09-16 14:02:43 +00008564/************************************************************************
8565 * *
8566 * Front ends when parsing an Entity *
8567 * *
8568 ************************************************************************/
8569
Daniel Veillard011b63c1999-06-02 17:44:04 +00008570/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008571 * xmlSAXParseBalancedChunk:
Daniel Veillard0142b842000-01-14 14:45:24 +00008572 * @ctx: an XML parser context (possibly NULL)
8573 * @sax: the SAX handler bloc (possibly NULL)
8574 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8575 * @input: a parser input stream
8576 * @enc: the encoding
8577 *
8578 * Parse a well-balanced chunk of an XML document
8579 * The user has to provide SAX callback block whose routines will be
8580 * called by the parser
8581 * The allowed sequence for the Well Balanced Chunk is the one defined by
8582 * the content production in the XML grammar:
8583 *
8584 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8585 *
Daniel Veillardcf461992000-03-14 18:30:20 +00008586 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
Daniel Veillard0142b842000-01-14 14:45:24 +00008587 * the error code otherwise
8588 */
8589
8590int
8591xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8592 void *user_data, xmlParserInputPtr input,
8593 xmlCharEncoding enc) {
8594 xmlParserCtxtPtr ctxt;
8595 int ret;
8596
8597 if (input == NULL) return(-1);
8598
8599 if (ctx != NULL)
8600 ctxt = ctx;
8601 else {
8602 ctxt = xmlNewParserCtxt();
8603 if (ctxt == NULL)
8604 return(-1);
8605 if (sax == NULL)
8606 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8607 }
8608
8609 /*
8610 * Set-up the SAX context
8611 */
8612 if (sax != NULL) {
8613 if (ctxt->sax != NULL)
8614 xmlFree(ctxt->sax);
8615 ctxt->sax = sax;
8616 ctxt->userData = user_data;
8617 }
8618
8619 /*
8620 * plug some encoding conversion routines here.
8621 */
8622 xmlPushInput(ctxt, input);
8623 if (enc != XML_CHAR_ENCODING_NONE)
8624 xmlSwitchEncoding(ctxt, enc);
8625
8626 /*
8627 * let's parse that entity knowing it's an external subset.
8628 */
8629 xmlParseContent(ctxt);
8630 ret = ctxt->errNo;
8631
8632 if (ctx == NULL) {
8633 if (sax != NULL)
8634 ctxt->sax = NULL;
8635 else
8636 xmlFreeDoc(ctxt->myDoc);
8637 xmlFreeParserCtxt(ctxt);
8638 }
8639 return(ret);
8640}
8641
8642/**
Daniel Veillard87b95392000-08-12 21:12:04 +00008643 * xmlParseCtxtExternalEntity:
8644 * @ctx: the existing parsing context
8645 * @URL: the URL for the entity to load
8646 * @ID: the System ID for the entity to load
8647 * @list: the return value for the set of parsed nodes
8648 *
8649 * Parse an external general entity within an existing parsing context
8650 * An external general parsed entity is well-formed if it matches the
8651 * production labeled extParsedEnt.
8652 *
8653 * [78] extParsedEnt ::= TextDecl? content
8654 *
8655 * Returns 0 if the entity is well formed, -1 in case of args problem and
8656 * the parser error code otherwise
8657 */
8658
8659int
8660xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8661 const xmlChar *ID, xmlNodePtr *list) {
8662 xmlParserCtxtPtr ctxt;
8663 xmlDocPtr newDoc;
8664 xmlSAXHandlerPtr oldsax = NULL;
8665 int ret = 0;
8666
8667 if (ctx->depth > 40) {
8668 return(XML_ERR_ENTITY_LOOP);
8669 }
8670
8671 if (list != NULL)
8672 *list = NULL;
8673 if ((URL == NULL) && (ID == NULL))
8674 return(-1);
8675 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8676 return(-1);
8677
8678
Daniel Veillard39c7d712000-09-10 16:14:55 +00008679 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
Daniel Veillard87b95392000-08-12 21:12:04 +00008680 if (ctxt == NULL) return(-1);
8681 ctxt->userData = ctxt;
8682 oldsax = ctxt->sax;
8683 ctxt->sax = ctx->sax;
8684 newDoc = xmlNewDoc(BAD_CAST "1.0");
8685 if (newDoc == NULL) {
8686 xmlFreeParserCtxt(ctxt);
8687 return(-1);
8688 }
8689 if (ctx->myDoc != NULL) {
8690 newDoc->intSubset = ctx->myDoc->intSubset;
8691 newDoc->extSubset = ctx->myDoc->extSubset;
8692 }
8693 if (ctx->myDoc->URL != NULL) {
8694 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8695 }
8696 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8697 if (newDoc->children == NULL) {
8698 ctxt->sax = oldsax;
8699 xmlFreeParserCtxt(ctxt);
8700 newDoc->intSubset = NULL;
8701 newDoc->extSubset = NULL;
8702 xmlFreeDoc(newDoc);
8703 return(-1);
8704 }
8705 nodePush(ctxt, newDoc->children);
8706 if (ctx->myDoc == NULL) {
8707 ctxt->myDoc = newDoc;
8708 } else {
8709 ctxt->myDoc = ctx->myDoc;
8710 newDoc->children->doc = ctx->myDoc;
8711 }
8712
8713 /*
8714 * Parse a possible text declaration first
8715 */
8716 GROW;
8717 if ((RAW == '<') && (NXT(1) == '?') &&
8718 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8719 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8720 xmlParseTextDecl(ctxt);
8721 }
8722
8723 /*
8724 * Doing validity checking on chunk doesn't make sense
8725 */
8726 ctxt->instate = XML_PARSER_CONTENT;
8727 ctxt->validate = ctx->validate;
8728 ctxt->depth = ctx->depth + 1;
8729 ctxt->replaceEntities = ctx->replaceEntities;
8730 if (ctxt->validate) {
8731 ctxt->vctxt.error = ctx->vctxt.error;
8732 ctxt->vctxt.warning = ctx->vctxt.warning;
8733 /* Allocate the Node stack */
8734 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
Daniel Veillard39c7d712000-09-10 16:14:55 +00008735 if (ctxt->vctxt.nodeTab == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +00008736 xmlGenericError(xmlGenericErrorContext,
8737 "xmlParseCtxtExternalEntity: out of memory\n");
Daniel Veillard39c7d712000-09-10 16:14:55 +00008738 ctxt->validate = 0;
8739 ctxt->vctxt.error = NULL;
8740 ctxt->vctxt.warning = NULL;
8741 } else {
8742 ctxt->vctxt.nodeNr = 0;
8743 ctxt->vctxt.nodeMax = 4;
8744 ctxt->vctxt.node = NULL;
8745 }
Daniel Veillard87b95392000-08-12 21:12:04 +00008746 } else {
8747 ctxt->vctxt.error = NULL;
8748 ctxt->vctxt.warning = NULL;
8749 }
8750
8751 xmlParseContent(ctxt);
8752
8753 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008754 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillard87b95392000-08-12 21:12:04 +00008755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8756 ctxt->sax->error(ctxt->userData,
8757 "chunk is not well balanced\n");
8758 ctxt->wellFormed = 0;
8759 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00008760 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008761 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillard87b95392000-08-12 21:12:04 +00008762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8763 ctxt->sax->error(ctxt->userData,
8764 "extra content at the end of well balanced chunk\n");
8765 ctxt->wellFormed = 0;
8766 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00008767 }
8768 if (ctxt->node != newDoc->children) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008769 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillard87b95392000-08-12 21:12:04 +00008770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8771 ctxt->sax->error(ctxt->userData,
8772 "chunk is not well balanced\n");
8773 ctxt->wellFormed = 0;
8774 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00008775 }
8776
8777 if (!ctxt->wellFormed) {
8778 if (ctxt->errNo == 0)
8779 ret = 1;
8780 else
8781 ret = ctxt->errNo;
8782 } else {
8783 if (list != NULL) {
8784 xmlNodePtr cur;
8785
8786 /*
8787 * Return the newly created nodeset after unlinking it from
8788 * they pseudo parent.
8789 */
8790 cur = newDoc->children->children;
8791 *list = cur;
8792 while (cur != NULL) {
8793 cur->parent = NULL;
8794 cur = cur->next;
8795 }
8796 newDoc->children->children = NULL;
8797 }
8798 ret = 0;
8799 }
8800 ctxt->sax = oldsax;
8801 xmlFreeParserCtxt(ctxt);
8802 newDoc->intSubset = NULL;
8803 newDoc->extSubset = NULL;
8804 xmlFreeDoc(newDoc);
8805
8806 return(ret);
8807}
8808
8809/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008810 * xmlParseExternalEntity:
Daniel Veillard0142b842000-01-14 14:45:24 +00008811 * @doc: the document the chunk pertains to
Daniel Veillardcf461992000-03-14 18:30:20 +00008812 * @sax: the SAX handler bloc (possibly NULL)
8813 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8814 * @depth: Used for loop detection, use 0
8815 * @URL: the URL for the entity to load
8816 * @ID: the System ID for the entity to load
8817 * @list: the return value for the set of parsed nodes
Daniel Veillard0142b842000-01-14 14:45:24 +00008818 *
Daniel Veillardcf461992000-03-14 18:30:20 +00008819 * Parse an external general entity
8820 * An external general parsed entity is well-formed if it matches the
8821 * production labeled extParsedEnt.
8822 *
8823 * [78] extParsedEnt ::= TextDecl? content
8824 *
8825 * Returns 0 if the entity is well formed, -1 in case of args problem and
8826 * the parser error code otherwise
Daniel Veillard0142b842000-01-14 14:45:24 +00008827 */
8828
Daniel Veillardcf461992000-03-14 18:30:20 +00008829int
8830xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8831 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8832 xmlParserCtxtPtr ctxt;
8833 xmlDocPtr newDoc;
8834 xmlSAXHandlerPtr oldsax = NULL;
8835 int ret = 0;
8836
8837 if (depth > 40) {
8838 return(XML_ERR_ENTITY_LOOP);
8839 }
8840
8841
8842
8843 if (list != NULL)
8844 *list = NULL;
8845 if ((URL == NULL) && (ID == NULL))
8846 return(-1);
Daniel Veillard87b95392000-08-12 21:12:04 +00008847 if (doc == NULL) /* @@ relax but check for dereferences */
8848 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00008849
8850
Daniel Veillard39c7d712000-09-10 16:14:55 +00008851 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
Daniel Veillardcf461992000-03-14 18:30:20 +00008852 if (ctxt == NULL) return(-1);
8853 ctxt->userData = ctxt;
8854 if (sax != NULL) {
8855 oldsax = ctxt->sax;
8856 ctxt->sax = sax;
8857 if (user_data != NULL)
8858 ctxt->userData = user_data;
8859 }
8860 newDoc = xmlNewDoc(BAD_CAST "1.0");
8861 if (newDoc == NULL) {
8862 xmlFreeParserCtxt(ctxt);
8863 return(-1);
8864 }
8865 if (doc != NULL) {
8866 newDoc->intSubset = doc->intSubset;
8867 newDoc->extSubset = doc->extSubset;
8868 }
8869 if (doc->URL != NULL) {
8870 newDoc->URL = xmlStrdup(doc->URL);
8871 }
8872 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8873 if (newDoc->children == NULL) {
8874 if (sax != NULL)
8875 ctxt->sax = oldsax;
8876 xmlFreeParserCtxt(ctxt);
8877 newDoc->intSubset = NULL;
8878 newDoc->extSubset = NULL;
8879 xmlFreeDoc(newDoc);
8880 return(-1);
8881 }
8882 nodePush(ctxt, newDoc->children);
8883 if (doc == NULL) {
8884 ctxt->myDoc = newDoc;
8885 } else {
8886 ctxt->myDoc = doc;
8887 newDoc->children->doc = doc;
8888 }
8889
8890 /*
8891 * Parse a possible text declaration first
8892 */
8893 GROW;
8894 if ((RAW == '<') && (NXT(1) == '?') &&
8895 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8896 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8897 xmlParseTextDecl(ctxt);
8898 }
8899
8900 /*
8901 * Doing validity checking on chunk doesn't make sense
8902 */
8903 ctxt->instate = XML_PARSER_CONTENT;
8904 ctxt->validate = 0;
8905 ctxt->depth = depth;
8906
8907 xmlParseContent(ctxt);
8908
8909 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008910 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00008911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8912 ctxt->sax->error(ctxt->userData,
8913 "chunk is not well balanced\n");
8914 ctxt->wellFormed = 0;
8915 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00008916 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008917 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8919 ctxt->sax->error(ctxt->userData,
8920 "extra content at the end of well balanced chunk\n");
8921 ctxt->wellFormed = 0;
8922 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00008923 }
8924 if (ctxt->node != newDoc->children) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00008925 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00008926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8927 ctxt->sax->error(ctxt->userData,
8928 "chunk is not well balanced\n");
8929 ctxt->wellFormed = 0;
8930 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00008931 }
8932
8933 if (!ctxt->wellFormed) {
8934 if (ctxt->errNo == 0)
8935 ret = 1;
8936 else
8937 ret = ctxt->errNo;
8938 } else {
8939 if (list != NULL) {
8940 xmlNodePtr cur;
8941
8942 /*
8943 * Return the newly created nodeset after unlinking it from
8944 * they pseudo parent.
8945 */
8946 cur = newDoc->children->children;
8947 *list = cur;
8948 while (cur != NULL) {
8949 cur->parent = NULL;
8950 cur = cur->next;
8951 }
8952 newDoc->children->children = NULL;
8953 }
8954 ret = 0;
8955 }
8956 if (sax != NULL)
8957 ctxt->sax = oldsax;
8958 xmlFreeParserCtxt(ctxt);
8959 newDoc->intSubset = NULL;
8960 newDoc->extSubset = NULL;
8961 xmlFreeDoc(newDoc);
8962
8963 return(ret);
Daniel Veillard0142b842000-01-14 14:45:24 +00008964}
8965
8966/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008967 * xmlParseBalancedChunk:
8968 * @doc: the document the chunk pertains to
8969 * @sax: the SAX handler bloc (possibly NULL)
8970 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8971 * @depth: Used for loop detection, use 0
8972 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8973 * @list: the return value for the set of parsed nodes
8974 *
8975 * Parse a well-balanced chunk of an XML document
8976 * called by the parser
8977 * The allowed sequence for the Well Balanced Chunk is the one defined by
8978 * the content production in the XML grammar:
8979 *
8980 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8981 *
8982 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8983 * the parser error code otherwise
8984 */
8985
8986int
8987xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
8988 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
8989 xmlParserCtxtPtr ctxt;
8990 xmlDocPtr newDoc;
8991 xmlSAXHandlerPtr oldsax = NULL;
8992 int size;
8993 int ret = 0;
8994
8995 if (depth > 40) {
8996 return(XML_ERR_ENTITY_LOOP);
8997 }
8998
8999
9000 if (list != NULL)
9001 *list = NULL;
9002 if (string == NULL)
9003 return(-1);
9004
9005 size = xmlStrlen(string);
9006
9007 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9008 if (ctxt == NULL) return(-1);
9009 ctxt->userData = ctxt;
9010 if (sax != NULL) {
9011 oldsax = ctxt->sax;
9012 ctxt->sax = sax;
9013 if (user_data != NULL)
9014 ctxt->userData = user_data;
9015 }
9016 newDoc = xmlNewDoc(BAD_CAST "1.0");
9017 if (newDoc == NULL) {
9018 xmlFreeParserCtxt(ctxt);
9019 return(-1);
9020 }
9021 if (doc != NULL) {
9022 newDoc->intSubset = doc->intSubset;
9023 newDoc->extSubset = doc->extSubset;
9024 }
9025 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9026 if (newDoc->children == NULL) {
9027 if (sax != NULL)
9028 ctxt->sax = oldsax;
9029 xmlFreeParserCtxt(ctxt);
9030 newDoc->intSubset = NULL;
9031 newDoc->extSubset = NULL;
9032 xmlFreeDoc(newDoc);
9033 return(-1);
9034 }
9035 nodePush(ctxt, newDoc->children);
9036 if (doc == NULL) {
9037 ctxt->myDoc = newDoc;
9038 } else {
9039 ctxt->myDoc = doc;
9040 newDoc->children->doc = doc;
9041 }
9042 ctxt->instate = XML_PARSER_CONTENT;
9043 ctxt->depth = depth;
9044
9045 /*
9046 * Doing validity checking on chunk doesn't make sense
9047 */
9048 ctxt->validate = 0;
9049
9050 xmlParseContent(ctxt);
9051
9052 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00009053 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00009054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9055 ctxt->sax->error(ctxt->userData,
9056 "chunk is not well balanced\n");
9057 ctxt->wellFormed = 0;
9058 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00009059 } else if (RAW != 0) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00009060 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9062 ctxt->sax->error(ctxt->userData,
9063 "extra content at the end of well balanced chunk\n");
9064 ctxt->wellFormed = 0;
9065 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00009066 }
9067 if (ctxt->node != newDoc->children) {
Daniel Veillarda2c6da92000-09-16 18:15:00 +00009068 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
Daniel Veillardcf461992000-03-14 18:30:20 +00009069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9070 ctxt->sax->error(ctxt->userData,
9071 "chunk is not well balanced\n");
9072 ctxt->wellFormed = 0;
9073 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00009074 }
9075
9076 if (!ctxt->wellFormed) {
9077 if (ctxt->errNo == 0)
9078 ret = 1;
9079 else
9080 ret = ctxt->errNo;
9081 } else {
9082 if (list != NULL) {
9083 xmlNodePtr cur;
9084
9085 /*
9086 * Return the newly created nodeset after unlinking it from
9087 * they pseudo parent.
9088 */
9089 cur = newDoc->children->children;
9090 *list = cur;
9091 while (cur != NULL) {
9092 cur->parent = NULL;
9093 cur = cur->next;
9094 }
9095 newDoc->children->children = NULL;
9096 }
9097 ret = 0;
9098 }
9099 if (sax != NULL)
9100 ctxt->sax = oldsax;
9101 xmlFreeParserCtxt(ctxt);
9102 newDoc->intSubset = NULL;
9103 newDoc->extSubset = NULL;
9104 xmlFreeDoc(newDoc);
9105
9106 return(ret);
9107}
9108
9109/**
Daniel Veillardb1059e22000-09-16 14:02:43 +00009110 * xmlSAXParseEntity:
9111 * @sax: the SAX handler block
9112 * @filename: the filename
Daniel Veillard0142b842000-01-14 14:45:24 +00009113 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00009114 * parse an XML external entity out of context and build a tree.
9115 * It use the given SAX function block to handle the parsing callback.
9116 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009117 *
Daniel Veillardb1059e22000-09-16 14:02:43 +00009118 * [78] extParsedEnt ::= TextDecl? content
9119 *
9120 * This correspond to a "Well Balanced" chunk
9121 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009122 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009123 */
9124
Daniel Veillard1e346af1999-02-22 10:33:01 +00009125xmlDocPtr
Daniel Veillardb1059e22000-09-16 14:02:43 +00009126xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9127 xmlDocPtr ret;
9128 xmlParserCtxtPtr ctxt;
9129 char *directory = NULL;
9130
9131 ctxt = xmlCreateFileParserCtxt(filename);
9132 if (ctxt == NULL) {
9133 return(NULL);
9134 }
9135 if (sax != NULL) {
9136 if (ctxt->sax != NULL)
9137 xmlFree(ctxt->sax);
9138 ctxt->sax = sax;
9139 ctxt->userData = NULL;
9140 }
9141
9142 if ((ctxt->directory == NULL) && (directory == NULL))
9143 directory = xmlParserGetDirectory(filename);
9144
9145 xmlParseExtParsedEnt(ctxt);
9146
9147 if (ctxt->wellFormed)
9148 ret = ctxt->myDoc;
9149 else {
9150 ret = NULL;
9151 xmlFreeDoc(ctxt->myDoc);
9152 ctxt->myDoc = NULL;
9153 }
9154 if (sax != NULL)
9155 ctxt->sax = NULL;
9156 xmlFreeParserCtxt(ctxt);
9157
9158 return(ret);
9159}
9160
9161/**
9162 * xmlParseEntity:
9163 * @filename: the filename
9164 *
9165 * parse an XML external entity out of context and build a tree.
9166 *
9167 * [78] extParsedEnt ::= TextDecl? content
9168 *
9169 * This correspond to a "Well Balanced" chunk
9170 *
9171 * Returns the resulting document tree
9172 */
9173
9174xmlDocPtr
9175xmlParseEntity(const char *filename) {
9176 return(xmlSAXParseEntity(NULL, filename));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009177}
9178
9179/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009180 * xmlCreateEntityParserCtxt:
9181 * @URL: the entity URL
9182 * @ID: the entity PUBLIC ID
9183 * @base: a posible base for the target URI
9184 *
9185 * Create a parser context for an external entity
9186 * Automatic support for ZLIB/Compress compressed document is provided
9187 * by default if found at compile-time.
9188 *
9189 * Returns the new parser context or NULL
9190 */
9191xmlParserCtxtPtr
9192xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9193 const xmlChar *base) {
9194 xmlParserCtxtPtr ctxt;
9195 xmlParserInputPtr inputStream;
9196 char *directory = NULL;
Daniel Veillard87b95392000-08-12 21:12:04 +00009197 xmlChar *uri;
9198
Daniel Veillardcf461992000-03-14 18:30:20 +00009199 ctxt = xmlNewParserCtxt();
9200 if (ctxt == NULL) {
9201 return(NULL);
9202 }
9203
Daniel Veillard87b95392000-08-12 21:12:04 +00009204 uri = xmlBuildURI(URL, base);
9205
9206 if (uri == NULL) {
9207 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9208 if (inputStream == NULL) {
9209 xmlFreeParserCtxt(ctxt);
9210 return(NULL);
9211 }
9212
9213 inputPush(ctxt, inputStream);
9214
9215 if ((ctxt->directory == NULL) && (directory == NULL))
9216 directory = xmlParserGetDirectory((char *)URL);
9217 if ((ctxt->directory == NULL) && (directory != NULL))
9218 ctxt->directory = directory;
9219 } else {
9220 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9221 if (inputStream == NULL) {
9222 xmlFreeParserCtxt(ctxt);
9223 return(NULL);
9224 }
9225
9226 inputPush(ctxt, inputStream);
9227
9228 if ((ctxt->directory == NULL) && (directory == NULL))
9229 directory = xmlParserGetDirectory((char *)uri);
9230 if ((ctxt->directory == NULL) && (directory != NULL))
9231 ctxt->directory = directory;
9232 xmlFree(uri);
Daniel Veillardcf461992000-03-14 18:30:20 +00009233 }
9234
Daniel Veillardcf461992000-03-14 18:30:20 +00009235 return(ctxt);
9236}
9237
Daniel Veillardb1059e22000-09-16 14:02:43 +00009238/************************************************************************
9239 * *
9240 * Front ends when parsing from a file *
9241 * *
9242 ************************************************************************/
9243
Daniel Veillardcf461992000-03-14 18:30:20 +00009244/**
9245 * xmlCreateFileParserCtxt:
Daniel Veillard11e00581998-10-24 18:27:49 +00009246 * @filename: the filename
9247 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009248 * Create a parser context for a file content.
9249 * Automatic support for ZLIB/Compress compressed document is provided
9250 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +00009251 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009252 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00009253 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00009254xmlParserCtxtPtr
9255xmlCreateFileParserCtxt(const char *filename)
9256{
9257 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009258 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009259 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +00009260 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009261
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009262 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillard39c7d712000-09-10 16:14:55 +00009263 if (buf == NULL) {
9264 return(NULL);
9265 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009266
Daniel Veillardb05deb71999-08-10 19:04:08 +00009267 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00009268 if (ctxt == NULL) {
Daniel Veillard39c7d712000-09-10 16:14:55 +00009269 if (xmlDefaultSAXHandler.error != NULL) {
9270 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9271 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009272 return(NULL);
9273 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00009274
9275 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009276 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00009277 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009278 return(NULL);
9279 }
9280
Daniel Veillard6454aec1999-09-02 22:04:43 +00009281 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009282 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009283 inputStream->base = inputStream->buf->buffer->content;
9284 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009285
9286 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +00009287 if ((ctxt->directory == NULL) && (directory == NULL))
9288 directory = xmlParserGetDirectory(filename);
9289 if ((ctxt->directory == NULL) && (directory != NULL))
9290 ctxt->directory = directory;
9291
Daniel Veillardd692aa41999-02-28 21:54:31 +00009292 return(ctxt);
9293}
9294
9295/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009296 * xmlSAXParseFile:
Daniel Veillardd692aa41999-02-28 21:54:31 +00009297 * @sax: the SAX handler block
9298 * @filename: the filename
9299 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9300 * documents
9301 *
9302 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9303 * compressed document is provided by default if found at compile-time.
9304 * It use the given SAX function block to handle the parsing callback.
9305 * If sax is NULL, fallback to the default DOM tree building routines.
9306 *
9307 * Returns the resulting document tree
9308 */
9309
Daniel Veillard011b63c1999-06-02 17:44:04 +00009310xmlDocPtr
9311xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +00009312 int recovery) {
9313 xmlDocPtr ret;
9314 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +00009315 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009316
9317 ctxt = xmlCreateFileParserCtxt(filename);
Daniel Veillard39c7d712000-09-10 16:14:55 +00009318 if (ctxt == NULL) {
9319 return(NULL);
9320 }
Daniel Veillard27d88741999-05-29 11:51:49 +00009321 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009322 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00009323 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +00009324 ctxt->sax = sax;
9325 ctxt->userData = NULL;
9326 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009327
Daniel Veillardb05deb71999-08-10 19:04:08 +00009328 if ((ctxt->directory == NULL) && (directory == NULL))
9329 directory = xmlParserGetDirectory(filename);
9330 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00009331 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +00009332
Daniel Veillard260a68f1998-08-13 03:39:55 +00009333 xmlParseDocument(ctxt);
9334
Daniel Veillard517752b1999-04-05 12:20:10 +00009335 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009336 else {
9337 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00009338 xmlFreeDoc(ctxt->myDoc);
9339 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009340 }
Daniel Veillard97fea181999-06-26 23:07:37 +00009341 if (sax != NULL)
9342 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009343 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009344
9345 return(ret);
9346}
9347
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009348/**
Daniel Veillardb1059e22000-09-16 14:02:43 +00009349 * xmlRecoverDoc:
9350 * @cur: a pointer to an array of xmlChar
9351 *
9352 * parse an XML in-memory document and build a tree.
9353 * In the case the document is not Well Formed, a tree is built anyway
9354 *
9355 * Returns the resulting document tree
9356 */
9357
9358xmlDocPtr
9359xmlRecoverDoc(xmlChar *cur) {
9360 return(xmlSAXParseDoc(NULL, cur, 1));
9361}
9362
9363/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009364 * xmlParseFile:
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009365 * @filename: the filename
9366 *
9367 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9368 * compressed document is provided by default if found at compile-time.
9369 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009370 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009371 */
9372
Daniel Veillard011b63c1999-06-02 17:44:04 +00009373xmlDocPtr
9374xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009375 return(xmlSAXParseFile(NULL, filename, 0));
9376}
9377
9378/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009379 * xmlRecoverFile:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009380 * @filename: the filename
9381 *
9382 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9383 * compressed document is provided by default if found at compile-time.
9384 * In the case the document is not Well Formed, a tree is built anyway
9385 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009386 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009387 */
9388
Daniel Veillard011b63c1999-06-02 17:44:04 +00009389xmlDocPtr
9390xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009391 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009392}
Daniel Veillard260a68f1998-08-13 03:39:55 +00009393
Daniel Veillardb1059e22000-09-16 14:02:43 +00009394
9395/**
9396 * xmlSetupParserForBuffer:
9397 * @ctxt: an XML parser context
9398 * @buffer: a xmlChar * buffer
9399 * @filename: a file name
9400 *
9401 * Setup the parser context to parse a new buffer; Clears any prior
9402 * contents from the parser context. The buffer parameter must not be
9403 * NULL, but the filename parameter can be
9404 */
9405void
9406xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9407 const char* filename)
9408{
9409 xmlParserInputPtr input;
9410
9411 input = xmlNewInputStream(ctxt);
9412 if (input == NULL) {
9413 perror("malloc");
9414 xmlFree(ctxt);
9415 return;
9416 }
9417
9418 xmlClearParserCtxt(ctxt);
9419 if (filename != NULL)
9420 input->filename = xmlMemStrdup(filename);
9421 input->base = buffer;
9422 input->cur = buffer;
9423 inputPush(ctxt, input);
9424}
9425
9426/**
9427 * xmlSAXUserParseFile:
9428 * @sax: a SAX handler
9429 * @user_data: The user data returned on SAX callbacks
9430 * @filename: a file name
9431 *
9432 * parse an XML file and call the given SAX handler routines.
9433 * Automatic support for ZLIB/Compress compressed document is provided
9434 *
9435 * Returns 0 in case of success or a error number otherwise
9436 */
9437int
9438xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9439 const char *filename) {
9440 int ret = 0;
9441 xmlParserCtxtPtr ctxt;
9442
9443 ctxt = xmlCreateFileParserCtxt(filename);
9444 if (ctxt == NULL) return -1;
9445 if (ctxt->sax != &xmlDefaultSAXHandler)
9446 xmlFree(ctxt->sax);
9447 ctxt->sax = sax;
9448 if (user_data != NULL)
9449 ctxt->userData = user_data;
9450
9451 xmlParseDocument(ctxt);
9452
9453 if (ctxt->wellFormed)
9454 ret = 0;
9455 else {
9456 if (ctxt->errNo != 0)
9457 ret = ctxt->errNo;
9458 else
9459 ret = -1;
9460 }
9461 if (sax != NULL)
9462 ctxt->sax = NULL;
9463 xmlFreeParserCtxt(ctxt);
9464
9465 return ret;
9466}
9467
9468/************************************************************************
9469 * *
9470 * Front ends when parsing from memory *
9471 * *
9472 ************************************************************************/
9473
Daniel Veillard11e00581998-10-24 18:27:49 +00009474/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009475 * xmlCreateMemoryParserCtxt:
Daniel Veillardb1059e22000-09-16 14:02:43 +00009476 * @buffer: a pointer to a char array
9477 * @size: the size of the array
Daniel Veillard11e00581998-10-24 18:27:49 +00009478 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009479 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +00009480 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009481 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00009482 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00009483xmlParserCtxtPtr
9484xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009485 xmlParserCtxtPtr ctxt;
9486 xmlParserInputPtr input;
Daniel Veillard46e370e2000-07-21 20:32:03 +00009487 xmlParserInputBufferPtr buf;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009488
Daniel Veillardb1059e22000-09-16 14:02:43 +00009489 if (buffer == NULL)
9490 return(NULL);
9491 if (size <= 0)
Daniel Veillardb566ce12000-03-04 11:39:42 +00009492 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009493
Daniel Veillardb05deb71999-08-10 19:04:08 +00009494 ctxt = xmlNewParserCtxt();
Daniel Veillardcf461992000-03-14 18:30:20 +00009495 if (ctxt == NULL)
Daniel Veillard260a68f1998-08-13 03:39:55 +00009496 return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00009497
Daniel Veillard46e370e2000-07-21 20:32:03 +00009498 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9499 if (buf == NULL) return(NULL);
9500
Daniel Veillardb05deb71999-08-10 19:04:08 +00009501 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009502 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00009503 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009504 return(NULL);
9505 }
9506
9507 input->filename = NULL;
Daniel Veillard46e370e2000-07-21 20:32:03 +00009508 input->buf = buf;
9509 input->base = input->buf->buffer->content;
9510 input->cur = input->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009511
9512 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00009513 return(ctxt);
9514}
9515
9516/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009517 * xmlSAXParseMemory:
Daniel Veillardd692aa41999-02-28 21:54:31 +00009518 * @sax: the SAX handler block
9519 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +00009520 * @size: the size of the array
9521 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
Daniel Veillardd692aa41999-02-28 21:54:31 +00009522 * documents
9523 *
9524 * parse an XML in-memory block and use the given SAX function block
9525 * to handle the parsing callback. If sax is NULL, fallback to the default
9526 * DOM tree building routines.
9527 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00009528 * Returns the resulting document tree
9529 */
9530xmlDocPtr
9531xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9532 xmlDocPtr ret;
9533 xmlParserCtxtPtr ctxt;
9534
9535 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9536 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00009537 if (sax != NULL) {
9538 ctxt->sax = sax;
9539 ctxt->userData = NULL;
9540 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009541
9542 xmlParseDocument(ctxt);
9543
Daniel Veillard517752b1999-04-05 12:20:10 +00009544 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009545 else {
9546 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00009547 xmlFreeDoc(ctxt->myDoc);
9548 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009549 }
Daniel Veillard97fea181999-06-26 23:07:37 +00009550 if (sax != NULL)
9551 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009552 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009553
9554 return(ret);
9555}
9556
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009557/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009558 * xmlParseMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +00009559 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009560 * @size: the size of the array
9561 *
9562 * parse an XML in-memory block and build a tree.
9563 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009564 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009565 */
9566
9567xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009568 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9569}
9570
9571/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009572 * xmlRecoverMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +00009573 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009574 * @size: the size of the array
9575 *
9576 * parse an XML in-memory block and build a tree.
9577 * In the case the document is not Well Formed, a tree is built anyway
9578 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009579 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009580 */
9581
9582xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9583 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009584}
Daniel Veillard260a68f1998-08-13 03:39:55 +00009585
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009586/**
9587 * xmlSAXUserParseMemory:
9588 * @sax: a SAX handler
9589 * @user_data: The user data returned on SAX callbacks
9590 * @buffer: an in-memory XML document input
Daniel Veillard51e3b151999-11-12 17:02:31 +00009591 * @size: the length of the XML document in bytes
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009592 *
9593 * A better SAX parsing routine.
9594 * parse an XML in-memory buffer and call the given SAX handler routines.
9595 *
9596 * Returns 0 in case of success or a error number otherwise
9597 */
9598int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9599 char *buffer, int size) {
9600 int ret = 0;
9601 xmlParserCtxtPtr ctxt;
Daniel Veillard87b95392000-08-12 21:12:04 +00009602 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009603
9604 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9605 if (ctxt == NULL) return -1;
Daniel Veillard87b95392000-08-12 21:12:04 +00009606 if (sax != NULL) {
9607 oldsax = ctxt->sax;
9608 ctxt->sax = sax;
9609 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009610 ctxt->userData = user_data;
9611
9612 xmlParseDocument(ctxt);
9613
9614 if (ctxt->wellFormed)
9615 ret = 0;
9616 else {
9617 if (ctxt->errNo != 0)
9618 ret = ctxt->errNo;
9619 else
9620 ret = -1;
9621 }
Daniel Veillard87b95392000-08-12 21:12:04 +00009622 if (sax != NULL) {
9623 ctxt->sax = oldsax;
9624 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +00009625 xmlFreeParserCtxt(ctxt);
9626
9627 return ret;
9628}
9629
Daniel Veillardb1059e22000-09-16 14:02:43 +00009630/**
9631 * xmlCreateDocParserCtxt:
9632 * @cur: a pointer to an array of xmlChar
9633 *
9634 * Creates a parser context for an XML in-memory document.
9635 *
9636 * Returns the new parser context or NULL
9637 */
9638xmlParserCtxtPtr
9639xmlCreateDocParserCtxt(xmlChar *cur) {
9640 int len;
9641
9642 if (cur == NULL)
9643 return(NULL);
9644 len = xmlStrlen(cur);
9645 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9646}
9647
9648/**
9649 * xmlSAXParseDoc:
9650 * @sax: the SAX handler block
9651 * @cur: a pointer to an array of xmlChar
9652 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9653 * documents
9654 *
9655 * parse an XML in-memory document and build a tree.
9656 * It use the given SAX function block to handle the parsing callback.
9657 * If sax is NULL, fallback to the default DOM tree building routines.
9658 *
9659 * Returns the resulting document tree
9660 */
9661
9662xmlDocPtr
9663xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9664 xmlDocPtr ret;
9665 xmlParserCtxtPtr ctxt;
9666
9667 if (cur == NULL) return(NULL);
9668
9669
9670 ctxt = xmlCreateDocParserCtxt(cur);
9671 if (ctxt == NULL) return(NULL);
9672 if (sax != NULL) {
9673 ctxt->sax = sax;
9674 ctxt->userData = NULL;
9675 }
9676
9677 xmlParseDocument(ctxt);
9678 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9679 else {
9680 ret = NULL;
9681 xmlFreeDoc(ctxt->myDoc);
9682 ctxt->myDoc = NULL;
9683 }
9684 if (sax != NULL)
9685 ctxt->sax = NULL;
9686 xmlFreeParserCtxt(ctxt);
9687
9688 return(ret);
9689}
9690
9691/**
9692 * xmlParseDoc:
9693 * @cur: a pointer to an array of xmlChar
9694 *
9695 * parse an XML in-memory document and build a tree.
9696 *
9697 * Returns the resulting document tree
9698 */
9699
9700xmlDocPtr
9701xmlParseDoc(xmlChar *cur) {
9702 return(xmlSAXParseDoc(NULL, cur, 0));
9703}
9704
Daniel Veillard260a68f1998-08-13 03:39:55 +00009705
Daniel Veillardb05deb71999-08-10 19:04:08 +00009706/************************************************************************
9707 * *
Daniel Veillard51e3b151999-11-12 17:02:31 +00009708 * Miscellaneous *
Daniel Veillardb05deb71999-08-10 19:04:08 +00009709 * *
9710 ************************************************************************/
9711
Daniel Veillard7cfce322000-10-04 12:40:27 +00009712#ifdef LIBXML_XPATH_ENABLED
9713#include <libxml/xpath.h>
9714#endif
9715
Daniel Veillardbc765302000-10-01 18:23:35 +00009716static int xmlParserInitialized = 0;
9717
9718/**
9719 * xmlInitParser:
9720 *
9721 * Initialization function for the XML parser.
9722 * This is not reentrant. Call once before processing in case of
9723 * use in multithreaded programs.
9724 */
9725
9726void
9727xmlInitParser(void) {
9728 if (xmlParserInitialized) return;
9729
9730 xmlInitCharEncodingHandlers();
9731 xmlInitializePredefinedEntities();
9732 xmlDefaultSAXHandlerInit();
Daniel Veillard7cfce322000-10-04 12:40:27 +00009733 xmlRegisterDefaultInputCallbacks();
9734 xmlRegisterDefaultOutputCallbacks();
Daniel Veillardbc765302000-10-01 18:23:35 +00009735#ifdef LIBXML_HTML_ENABLED
9736 htmlInitAutoClose();
9737 htmlDefaultSAXHandlerInit();
9738#endif
Daniel Veillard7cfce322000-10-04 12:40:27 +00009739#ifdef LIBXML_XPATH_ENABLED
9740 xmlXPathInit();
9741#endif
Daniel Veillardbc765302000-10-01 18:23:35 +00009742 xmlParserInitialized = 1;
9743}
9744
Daniel Veillarda819dac1999-11-24 18:04:22 +00009745/**
9746 * xmlCleanupParser:
9747 *
9748 * Cleanup function for the XML parser. It tries to reclaim all
9749 * parsing related global memory allocated for the parser processing.
9750 * It doesn't deallocate any document related memory. Calling this
9751 * function should not prevent reusing the parser.
9752 */
9753
9754void
9755xmlCleanupParser(void) {
Daniel Veillardbc765302000-10-01 18:23:35 +00009756 xmlParserInitialized = 0;
Daniel Veillarda819dac1999-11-24 18:04:22 +00009757 xmlCleanupCharEncodingHandlers();
Daniel Veillardf5c2c871999-12-01 09:51:45 +00009758 xmlCleanupPredefinedEntities();
Daniel Veillarda819dac1999-11-24 18:04:22 +00009759}
Daniel Veillardb05deb71999-08-10 19:04:08 +00009760
Daniel Veillard11e00581998-10-24 18:27:49 +00009761/**
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00009762 * xmlPedanticParserDefault:
9763 * @val: int 0 or 1
9764 *
9765 * Set and return the previous value for enabling pedantic warnings.
9766 *
9767 * Returns the last value for 0 for no substitution, 1 for substitution.
9768 */
9769
9770int
9771xmlPedanticParserDefault(int val) {
9772 int old = xmlPedanticParserDefaultValue;
9773
9774 xmlPedanticParserDefaultValue = val;
9775 return(old);
9776}
9777
9778/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009779 * xmlSubstituteEntitiesDefault:
Daniel Veillardb05deb71999-08-10 19:04:08 +00009780 * @val: int 0 or 1
9781 *
9782 * Set and return the previous value for default entity support.
9783 * Initially the parser always keep entity references instead of substituting
9784 * entity values in the output. This function has to be used to change the
9785 * default parser behaviour
9786 * SAX::subtituteEntities() has to be used for changing that on a file by
9787 * file basis.
9788 *
9789 * Returns the last value for 0 for no substitution, 1 for substitution.
9790 */
9791
9792int
9793xmlSubstituteEntitiesDefault(int val) {
9794 int old = xmlSubstituteEntitiesDefaultValue;
9795
9796 xmlSubstituteEntitiesDefaultValue = val;
9797 return(old);
9798}
9799
Daniel Veillardfb76c402000-03-04 11:39:42 +00009800/**
9801 * xmlKeepBlanksDefault:
9802 * @val: int 0 or 1
9803 *
9804 * Set and return the previous value for default blanks text nodes support.
9805 * The 1.x version of the parser used an heuristic to try to detect
9806 * ignorable white spaces. As a result the SAX callback was generating
9807 * ignorableWhitespace() callbacks instead of characters() one, and when
9808 * using the DOM output text nodes containing those blanks were not generated.
9809 * The 2.x and later version will switch to the XML standard way and
9810 * ignorableWhitespace() are only generated when running the parser in
9811 * validating mode and when the current element doesn't allow CDATA or
9812 * mixed content.
9813 * This function is provided as a way to force the standard behaviour
9814 * on 1.X libs and to switch back to the old mode for compatibility when
9815 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9816 * by using xmlIsBlankNode() commodity function to detect the "empty"
9817 * nodes generated.
9818 * This value also affect autogeneration of indentation when saving code
9819 * if blanks sections are kept, indentation is not generated.
9820 *
9821 * Returns the last value for 0 for no substitution, 1 for substitution.
9822 */
9823
9824int
9825xmlKeepBlanksDefault(int val) {
9826 int old = xmlKeepBlanksDefaultValue;
9827
9828 xmlKeepBlanksDefaultValue = val;
9829 xmlIndentTreeOutput = !val;
9830 return(old);
9831}
9832