blob: 015640c0dae5c40ee29ab1109bf93ed7653c466b [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
Daniel Veillard260a68f1998-08-13 03:39:55 +000015#include <stdio.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000016#include <string.h> /* for memset() only */
Daniel Veillard7f7d1111999-09-22 09:46:25 +000017#ifdef HAVE_CTYPE_H
18#include <ctype.h>
19#endif
20#ifdef HAVE_STDLIB_H
Seth Alvese7f12e61998-10-01 20:51:15 +000021#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#endif
23#ifdef HAVE_SYS_STAT_H
Daniel Veillard260a68f1998-08-13 03:39:55 +000024#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000025#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +000026#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32#ifdef HAVE_ZLIB_H
33#include <zlib.h>
34#endif
35
Daniel Veillard6454aec1999-09-02 22:04:43 +000036#include "xmlmemory.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000037#include "tree.h"
38#include "parser.h"
39#include "entities.h"
Daniel Veillard27d88741999-05-29 11:51:49 +000040#include "encoding.h"
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000041#include "valid.h"
Daniel Veillard1e346af1999-02-22 10:33:01 +000042#include "parserInternals.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000043#include "xmlIO.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000044#include "xml-error.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000045
Daniel Veillarddbfd6411999-12-28 16:35:14 +000046#define XML_PARSER_BIG_BUFFER_SIZE 1000
47#define XML_PARSER_BUFFER_SIZE 100
48
Daniel Veillard14fff061999-06-22 21:49:07 +000049const char *xmlParserVersion = LIBXML_VERSION;
50
Daniel Veillard3c558c31999-12-22 11:30:41 +000051/*
52 * List of XML prefixed PI allowed by W3C specs
53 */
54
55const char *xmlW3CPIs[] = {
56 "xml-stylesheet",
57 NULL
58};
Daniel Veillarde2d034d1999-07-27 19:52:06 +000059
60/************************************************************************
61 * *
62 * Input handling functions for progressive parsing *
63 * *
64 ************************************************************************/
65
66/* #define DEBUG_INPUT */
Daniel Veillarddbfd6411999-12-28 16:35:14 +000067/* #define DEBUG_STACK */
68/* #define DEBUG_PUSH */
69
Daniel Veillarde2d034d1999-07-27 19:52:06 +000070
Daniel Veillardb05deb71999-08-10 19:04:08 +000071#define INPUT_CHUNK 250
72/* we need to keep enough input to show errors in context */
73#define LINE_LEN 80
Daniel Veillarde2d034d1999-07-27 19:52:06 +000074
75#ifdef DEBUG_INPUT
76#define CHECK_BUFFER(in) check_buffer(in)
Daniel Veillarde2d034d1999-07-27 19:52:06 +000077
78void check_buffer(xmlParserInputPtr in) {
79 if (in->base != in->buf->buffer->content) {
80 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
81 }
82 if (in->cur < in->base) {
83 fprintf(stderr, "xmlParserInput: cur < base problem\n");
84 }
85 if (in->cur > in->base + in->buf->buffer->use) {
86 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
87 }
88 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
89 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
90 in->buf->buffer->use, in->buf->buffer->size);
91}
92
Daniel Veillardb05deb71999-08-10 19:04:08 +000093#else
94#define CHECK_BUFFER(in)
95#endif
96
Daniel Veillarde2d034d1999-07-27 19:52:06 +000097
98/**
99 * xmlParserInputRead:
100 * @in: an XML parser input
101 * @len: an indicative size for the lookahead
102 *
103 * This function refresh the input for the parser. It doesn't try to
104 * preserve pointers to the input buffer, and discard already read data
105 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000106 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000107 * end of this entity
108 */
109int
110xmlParserInputRead(xmlParserInputPtr in, int len) {
111 int ret;
112 int used;
113 int index;
114
115#ifdef DEBUG_INPUT
116 fprintf(stderr, "Read\n");
117#endif
118 if (in->buf == NULL) return(-1);
119 if (in->base == NULL) return(-1);
120 if (in->cur == NULL) return(-1);
121 if (in->buf->buffer == NULL) return(-1);
122
123 CHECK_BUFFER(in);
124
125 used = in->cur - in->buf->buffer->content;
126 ret = xmlBufferShrink(in->buf->buffer, used);
127 if (ret > 0) {
128 in->cur -= ret;
129 in->consumed += ret;
130 }
131 ret = xmlParserInputBufferRead(in->buf, len);
132 if (in->base != in->buf->buffer->content) {
133 /*
134 * the buffer has been realloced
135 */
136 index = in->cur - in->base;
137 in->base = in->buf->buffer->content;
138 in->cur = &in->buf->buffer->content[index];
139 }
140
141 CHECK_BUFFER(in);
142
143 return(ret);
144}
145
146/**
147 * xmlParserInputGrow:
148 * @in: an XML parser input
149 * @len: an indicative size for the lookahead
150 *
151 * This function increase the input for the parser. It tries to
152 * preserve pointers to the input buffer, and keep already read data
153 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000154 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000155 * end of this entity
156 */
157int
158xmlParserInputGrow(xmlParserInputPtr in, int len) {
159 int ret;
160 int index;
161
162#ifdef DEBUG_INPUT
163 fprintf(stderr, "Grow\n");
164#endif
165 if (in->buf == NULL) return(-1);
166 if (in->base == NULL) return(-1);
167 if (in->cur == NULL) return(-1);
168 if (in->buf->buffer == NULL) return(-1);
169
170 CHECK_BUFFER(in);
171
172 index = in->cur - in->base;
173 if (in->buf->buffer->use > index + INPUT_CHUNK) {
174
175 CHECK_BUFFER(in);
176
177 return(0);
178 }
Daniel Veillardda07c342000-01-25 18:31:22 +0000179 if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
180 (in->buf->file != NULL) ||
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000181#ifdef HAVE_ZLIB_H
182 (in->buf->gzfile != NULL) ||
183#endif
184 (in->buf->fd >= 0))
185 ret = xmlParserInputBufferGrow(in->buf, len);
186 else
187 return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000188
189 /*
190 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
191 * block, but we use it really as an integer to do some
192 * pointer arithmetic. Insure will raise it as a bug but in
193 * that specific case, that's not !
194 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000195 if (in->base != in->buf->buffer->content) {
196 /*
197 * the buffer has been realloced
198 */
199 index = in->cur - in->base;
200 in->base = in->buf->buffer->content;
201 in->cur = &in->buf->buffer->content[index];
202 }
203
204 CHECK_BUFFER(in);
205
206 return(ret);
207}
208
209/**
210 * xmlParserInputShrink:
211 * @in: an XML parser input
212 *
213 * This function removes used input for the parser.
214 */
215void
216xmlParserInputShrink(xmlParserInputPtr in) {
217 int used;
218 int ret;
219 int index;
220
221#ifdef DEBUG_INPUT
222 fprintf(stderr, "Shrink\n");
223#endif
224 if (in->buf == NULL) return;
225 if (in->base == NULL) return;
226 if (in->cur == NULL) return;
227 if (in->buf->buffer == NULL) return;
228
229 CHECK_BUFFER(in);
230
231 used = in->cur - in->buf->buffer->content;
232 if (used > INPUT_CHUNK) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000233 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000234 if (ret > 0) {
235 in->cur -= ret;
236 in->consumed += ret;
237 }
238 }
239
240 CHECK_BUFFER(in);
241
242 if (in->buf->buffer->use > INPUT_CHUNK) {
243 return;
244 }
245 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
246 if (in->base != in->buf->buffer->content) {
247 /*
248 * the buffer has been realloced
249 */
250 index = in->cur - in->base;
251 in->base = in->buf->buffer->content;
252 in->cur = &in->buf->buffer->content[index];
253 }
254
255 CHECK_BUFFER(in);
256}
257
Daniel Veillard260a68f1998-08-13 03:39:55 +0000258/************************************************************************
259 * *
260 * Parser stacks related functions and macros *
261 * *
262 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000263
264int xmlSubstituteEntitiesDefaultValue = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000265int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000266xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
267 const xmlChar ** str);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000268
Daniel Veillard260a68f1998-08-13 03:39:55 +0000269/*
270 * Generic function for accessing stacks in the Parser Context
271 */
272
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000273#define PUSH_AND_POP(scope, type, name) \
274scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000275 if (ctxt->name##Nr >= ctxt->name##Max) { \
276 ctxt->name##Max *= 2; \
Daniel Veillard6454aec1999-09-02 22:04:43 +0000277 ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000278 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
279 if (ctxt->name##Tab == NULL) { \
280 fprintf(stderr, "realloc failed !\n"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000281 return(0); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000282 } \
283 } \
284 ctxt->name##Tab[ctxt->name##Nr] = value; \
285 ctxt->name = value; \
286 return(ctxt->name##Nr++); \
287} \
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000288scope type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000289 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000290 if (ctxt->name##Nr <= 0) return(0); \
291 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000292 if (ctxt->name##Nr > 0) \
293 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
294 else \
295 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000296 ret = ctxt->name##Tab[ctxt->name##Nr]; \
297 ctxt->name##Tab[ctxt->name##Nr] = 0; \
298 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000299} \
300
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000301PUSH_AND_POP(extern, xmlParserInputPtr, input)
302PUSH_AND_POP(extern, xmlNodePtr, node)
303PUSH_AND_POP(extern, xmlChar*, name)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000304
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000305/*
306 * Macros for accessing the content. Those should be used only by the parser,
307 * and not exported.
308 *
309 * Dirty macros, i.e. one need to make assumption on the context to use them
310 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000311 * CUR_PTR return the current pointer to the xmlChar to be parsed.
312 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000313 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
314 * in UNICODE mode. This should be used internally by the parser
315 * only to compare to ASCII values otherwise it would break when
316 * running with UTF-8 encoding.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000317 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000318 * to compare on ASCII based substring.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000319 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000320 * strings within the parser.
321 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000322 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000323 *
324 * CURRENT Returns the current char value, with the full decoding of
325 * UTF-8 if we are using this mode. It returns an int.
326 * NEXT Skip to the next character, this does the proper decoding
327 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard011b63c1999-06-02 17:44:04 +0000328 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000329 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000330
Daniel Veillardb05deb71999-08-10 19:04:08 +0000331#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard10a2c651999-12-12 13:03:50 +0000332#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000333#define NXT(val) ctxt->input->cur[(val)]
334#define CUR_PTR ctxt->input->cur
Daniel Veillardb05deb71999-08-10 19:04:08 +0000335#define SHRINK xmlParserInputShrink(ctxt->input); \
336 if ((*ctxt->input->cur == 0) && \
337 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
338 xmlPopInput(ctxt)
339
340#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
341 if ((*ctxt->input->cur == 0) && \
342 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
343 xmlPopInput(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000344
345#define SKIP_BLANKS \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000346 do { \
347 while (IS_BLANK(CUR)) NEXT; \
Daniel Veillard686d6b62000-01-03 11:08:02 +0000348 while ((CUR == 0) && (ctxt->inputNr > 1)) \
349 xmlPopInput(ctxt); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000350 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
351 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
352 } while (IS_BLANK(CUR));
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000353
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000354#define CURRENT (*ctxt->input->cur)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000355#define NEXT { \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000356 if (ctxt->token != 0) ctxt->token = 0; \
357 else { \
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000358 if ((*ctxt->input->cur == 0) && \
359 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
360 xmlPopInput(ctxt); \
361 } else { \
362 if (*(ctxt->input->cur) == '\n') { \
363 ctxt->input->line++; ctxt->input->col = 1; \
364 } else ctxt->input->col++; \
365 ctxt->input->cur++; \
Daniel Veillard10a2c651999-12-12 13:03:50 +0000366 ctxt->nbChars++; \
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000367 if (*ctxt->input->cur == 0) \
368 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000369 } \
370 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
371 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
372}}
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000373
Daniel Veillard260a68f1998-08-13 03:39:55 +0000374
Daniel Veillardb05deb71999-08-10 19:04:08 +0000375/************************************************************************
376 * *
377 * Commodity functions to handle entities processing *
378 * *
379 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +0000380
Daniel Veillard11e00581998-10-24 18:27:49 +0000381/**
382 * xmlPopInput:
383 * @ctxt: an XML parser context
384 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000385 * xmlPopInput: the current input pointed by ctxt->input came to an end
386 * pop it and return the next char.
387 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000388 * Returns the current xmlChar in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000389 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000390xmlChar
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000391xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000392 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardbc50b591999-03-01 12:28:53 +0000393 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000394 if ((*ctxt->input->cur == 0) &&
395 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
396 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000397 return(CUR);
398}
399
Daniel Veillard11e00581998-10-24 18:27:49 +0000400/**
401 * xmlPushInput:
402 * @ctxt: an XML parser context
403 * @input: an XML parser input fragment (entity, XML fragment ...).
404 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000405 * xmlPushInput: switch to a new input stream which is stacked on top
406 * of the previous one(s).
407 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000408void
409xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000410 if (input == NULL) return;
411 inputPush(ctxt, input);
412}
413
Daniel Veillard11e00581998-10-24 18:27:49 +0000414/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000415 * xmlFreeInputStream:
Daniel Veillard51e3b151999-11-12 17:02:31 +0000416 * @input: an xmlParserInputPtr
Daniel Veillardd692aa41999-02-28 21:54:31 +0000417 *
418 * Free up an input stream.
419 */
420void
421xmlFreeInputStream(xmlParserInputPtr input) {
422 if (input == NULL) return;
423
Daniel Veillard6454aec1999-09-02 22:04:43 +0000424 if (input->filename != NULL) xmlFree((char *) input->filename);
425 if (input->directory != NULL) xmlFree((char *) input->directory);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000426 if ((input->free != NULL) && (input->base != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000427 input->free((xmlChar *) input->base);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000428 if (input->buf != NULL)
429 xmlFreeParserInputBuffer(input->buf);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000430 memset(input, -1, sizeof(xmlParserInput));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000431 xmlFree(input);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000432}
433
434/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000435 * xmlNewInputStream:
436 * @ctxt: an XML parser context
437 *
438 * Create a new input stream structure
439 * Returns the new input stream or NULL
440 */
441xmlParserInputPtr
442xmlNewInputStream(xmlParserCtxtPtr ctxt) {
443 xmlParserInputPtr input;
444
Daniel Veillard6454aec1999-09-02 22:04:43 +0000445 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000446 if (input == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000447 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000449 ctxt->sax->error(ctxt->userData,
450 "malloc: couldn't allocate a new input stream\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000451 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000452 return(NULL);
453 }
454 input->filename = NULL;
455 input->directory = NULL;
456 input->base = NULL;
457 input->cur = NULL;
458 input->buf = NULL;
459 input->line = 1;
460 input->col = 1;
461 input->buf = NULL;
462 input->free = NULL;
463 input->consumed = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000464 input->length = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000465 return(input);
466}
467
468/**
Daniel Veillard11e00581998-10-24 18:27:49 +0000469 * xmlNewEntityInputStream:
470 * @ctxt: an XML parser context
471 * @entity: an Entity pointer
472 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000473 * Create a new input stream based on an xmlEntityPtr
Daniel Veillardb96e6431999-08-29 21:02:19 +0000474 *
475 * Returns the new input stream or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000476 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000477xmlParserInputPtr
478xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000479 xmlParserInputPtr input;
480
481 if (entity == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000482 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000484 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000485 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000486 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardccb09631998-10-27 06:21:04 +0000487 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000488 }
489 if (entity->content == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +0000490 switch (entity->type) {
491 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000492 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb96e6431999-08-29 21:02:19 +0000493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
494 ctxt->sax->error(ctxt->userData,
495 "xmlNewEntityInputStream unparsed entity !\n");
496 break;
497 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
498 case XML_EXTERNAL_PARAMETER_ENTITY:
499 return(xmlLoadExternalEntity((char *) entity->SystemID,
Daniel Veillard686d6b62000-01-03 11:08:02 +0000500 (char *) entity->ExternalID, ctxt));
Daniel Veillardb96e6431999-08-29 21:02:19 +0000501 case XML_INTERNAL_GENERAL_ENTITY:
502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
503 ctxt->sax->error(ctxt->userData,
504 "Internal entity %s without content !\n", entity->name);
505 break;
506 case XML_INTERNAL_PARAMETER_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000507 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +0000508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
509 ctxt->sax->error(ctxt->userData,
510 "Internal parameter entity %s without content !\n", entity->name);
511 break;
512 case XML_INTERNAL_PREDEFINED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000513 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +0000514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
515 ctxt->sax->error(ctxt->userData,
516 "Predefined entity %s without content !\n", entity->name);
517 break;
518 }
Daniel Veillardccb09631998-10-27 06:21:04 +0000519 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000520 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000521 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000522 if (input == NULL) {
Daniel Veillardccb09631998-10-27 06:21:04 +0000523 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000524 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000525 input->filename = (char *) entity->SystemID; /* TODO !!! char <- xmlChar */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000526 input->base = entity->content;
527 input->cur = entity->content;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000528 input->length = entity->length;
Daniel Veillardccb09631998-10-27 06:21:04 +0000529 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000530}
531
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000532/**
533 * xmlNewStringInputStream:
534 * @ctxt: an XML parser context
Daniel Veillardb05deb71999-08-10 19:04:08 +0000535 * @buffer: an memory buffer
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000536 *
537 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000538 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000539 */
540xmlParserInputPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000541xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000542 xmlParserInputPtr input;
543
Daniel Veillardb05deb71999-08-10 19:04:08 +0000544 if (buffer == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000545 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000547 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000548 "internal: xmlNewStringInputStream string = NULL\n");
549 return(NULL);
550 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000551 input = xmlNewInputStream(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000552 if (input == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000553 return(NULL);
554 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000555 input->base = buffer;
556 input->cur = buffer;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000557 input->length = xmlStrlen(buffer);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000558 return(input);
559}
560
Daniel Veillard011b63c1999-06-02 17:44:04 +0000561/**
562 * xmlNewInputFromFile:
563 * @ctxt: an XML parser context
564 * @filename: the filename to use as entity
565 *
566 * Create a new input stream based on a file.
567 *
568 * Returns the new input stream or NULL in case of error
569 */
570xmlParserInputPtr
571xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000572 xmlParserInputBufferPtr buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000573 xmlParserInputPtr inputStream;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000574 char *directory = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000575
Daniel Veillardb05deb71999-08-10 19:04:08 +0000576 if (ctxt == NULL) return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000577 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000578 if (buf == NULL) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000579 char name[XML_PARSER_BIG_BUFFER_SIZE];
Daniel Veillard011b63c1999-06-02 17:44:04 +0000580
Daniel Veillardb05deb71999-08-10 19:04:08 +0000581 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
582#ifdef WIN32
583 sprintf(name, "%s\\%s", ctxt->input->directory, filename);
584#else
585 sprintf(name, "%s/%s", ctxt->input->directory, filename);
586#endif
587 buf = xmlParserInputBufferCreateFilename(name,
588 XML_CHAR_ENCODING_NONE);
589 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +0000590 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000591 }
592 if ((buf == NULL) && (ctxt->directory != NULL)) {
593#ifdef WIN32
594 sprintf(name, "%s\\%s", ctxt->directory, filename);
595#else
596 sprintf(name, "%s/%s", ctxt->directory, filename);
597#endif
598 buf = xmlParserInputBufferCreateFilename(name,
599 XML_CHAR_ENCODING_NONE);
600 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +0000601 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000602 }
603 if (buf == NULL)
604 return(NULL);
605 }
606 if (directory == NULL)
607 directory = xmlParserGetDirectory(filename);
608
609 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000610 if (inputStream == NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +0000611 if (directory != NULL) xmlFree((char *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000612 return(NULL);
613 }
614
Daniel Veillard6454aec1999-09-02 22:04:43 +0000615 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000616 inputStream->directory = directory;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000617 inputStream->buf = buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000618
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000619 inputStream->base = inputStream->buf->buffer->content;
620 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000621 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillard294cbca1999-12-03 13:19:09 +0000622 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000623 return(inputStream);
624}
625
626/************************************************************************
627 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +0000628 * Commodity functions to handle parser contexts *
629 * *
630 ************************************************************************/
631
632/**
633 * xmlInitParserCtxt:
634 * @ctxt: an XML parser context
635 *
636 * Initialize a parser context
637 */
638
639void
640xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
641{
642 xmlSAXHandler *sax;
643
Daniel Veillard6454aec1999-09-02 22:04:43 +0000644 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000645 if (sax == NULL) {
646 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
647 }
648
649 /* Allocate the Input stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +0000650 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000651 ctxt->inputNr = 0;
652 ctxt->inputMax = 5;
653 ctxt->input = NULL;
654 ctxt->version = NULL;
655 ctxt->encoding = NULL;
656 ctxt->standalone = -1;
657 ctxt->hasExternalSubset = 0;
658 ctxt->hasPErefs = 0;
659 ctxt->html = 0;
660 ctxt->external = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000661 ctxt->instate = XML_PARSER_START;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000662 ctxt->token = 0;
663 ctxt->directory = NULL;
664
665 /* Allocate the Node stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +0000666 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000667 ctxt->nodeNr = 0;
668 ctxt->nodeMax = 10;
669 ctxt->node = NULL;
670
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000671 /* Allocate the Name stack */
672 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
673 ctxt->nameNr = 0;
674 ctxt->nameMax = 10;
675 ctxt->name = NULL;
676
Daniel Veillardb05deb71999-08-10 19:04:08 +0000677 if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
678 else {
679 ctxt->sax = sax;
680 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
681 }
682 ctxt->userData = ctxt;
683 ctxt->myDoc = NULL;
684 ctxt->wellFormed = 1;
685 ctxt->valid = 1;
686 ctxt->validate = xmlDoValidityCheckingDefaultValue;
687 ctxt->vctxt.userData = ctxt;
Daniel Veillard5feb8492000-02-02 17:15:36 +0000688 if (ctxt->validate) {
689 ctxt->vctxt.error = xmlParserValidityError;
690 ctxt->vctxt.warning = xmlParserValidityWarning;
691 } else {
692 ctxt->vctxt.error = NULL;
693 ctxt->vctxt.warning = NULL;
694 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000695 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
696 ctxt->record_info = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000697 ctxt->nbChars = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000698 ctxt->checkIndex = 0;
699 ctxt->errNo = XML_ERR_OK;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000700 xmlInitNodeInfoSeq(&ctxt->node_seq);
701}
702
703/**
704 * xmlFreeParserCtxt:
705 * @ctxt: an XML parser context
706 *
707 * Free all the memory used by a parser context. However the parsed
708 * document in ctxt->myDoc is not freed.
709 */
710
711void
712xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
713{
714 xmlParserInputPtr input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000715 xmlChar *oldname;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000716
717 if (ctxt == NULL) return;
718
719 while ((input = inputPop(ctxt)) != NULL) {
720 xmlFreeInputStream(input);
721 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000722 while ((oldname = namePop(ctxt)) != NULL) {
723 xmlFree(oldname);
724 }
725 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000726 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
727 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
728 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
729 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000730 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
Daniel Veillard6454aec1999-09-02 22:04:43 +0000731 xmlFree(ctxt->sax);
732 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
733 xmlFree(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000734}
735
736/**
737 * xmlNewParserCtxt:
738 *
739 * Allocate and initialize a new parser context.
740 *
741 * Returns the xmlParserCtxtPtr or NULL
742 */
743
744xmlParserCtxtPtr
745xmlNewParserCtxt()
746{
747 xmlParserCtxtPtr ctxt;
748
Daniel Veillard6454aec1999-09-02 22:04:43 +0000749 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000750 if (ctxt == NULL) {
751 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
752 perror("malloc");
753 return(NULL);
754 }
755 xmlInitParserCtxt(ctxt);
756 return(ctxt);
757}
758
759/**
760 * xmlClearParserCtxt:
761 * @ctxt: an XML parser context
762 *
763 * Clear (release owned resources) and reinitialize a parser context
764 */
765
766void
767xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
768{
769 xmlClearNodeInfoSeq(&ctxt->node_seq);
770 xmlInitParserCtxt(ctxt);
771}
772
773/************************************************************************
774 * *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000775 * Commodity functions to handle entities *
776 * *
777 ************************************************************************/
778
Daniel Veillardb05deb71999-08-10 19:04:08 +0000779void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
780void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000781xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
782 const xmlChar **str);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000783
784/**
785 * xmlParseCharRef:
786 * @ctxt: an XML parser context
787 *
788 * parse Reference declarations
789 *
790 * [66] CharRef ::= '&#' [0-9]+ ';' |
791 * '&#x' [0-9a-fA-F]+ ';'
792 *
793 * [ WFC: Legal Character ]
794 * Characters referred to using character references must match the
795 * production for Char.
796 *
Daniel Veillard10a2c651999-12-12 13:03:50 +0000797 * Returns the value parsed (as an int), 0 in case of error
Daniel Veillardb05deb71999-08-10 19:04:08 +0000798 */
799int
800xmlParseCharRef(xmlParserCtxtPtr ctxt) {
801 int val = 0;
802
803 if (ctxt->token != 0) {
804 val = ctxt->token;
805 ctxt->token = 0;
806 return(val);
807 }
808 if ((CUR == '&') && (NXT(1) == '#') &&
809 (NXT(2) == 'x')) {
810 SKIP(3);
811 while (CUR != ';') {
812 if ((CUR >= '0') && (CUR <= '9'))
813 val = val * 16 + (CUR - '0');
814 else if ((CUR >= 'a') && (CUR <= 'f'))
815 val = val * 16 + (CUR - 'a') + 10;
816 else if ((CUR >= 'A') && (CUR <= 'F'))
817 val = val * 16 + (CUR - 'A') + 10;
818 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000819 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
821 ctxt->sax->error(ctxt->userData,
822 "xmlParseCharRef: invalid hexadecimal value\n");
823 ctxt->wellFormed = 0;
824 val = 0;
825 break;
826 }
827 NEXT;
828 }
829 if (CUR == ';')
Daniel Veillard35008381999-10-25 13:15:52 +0000830 SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */
Daniel Veillardb05deb71999-08-10 19:04:08 +0000831 } else if ((CUR == '&') && (NXT(1) == '#')) {
832 SKIP(2);
833 while (CUR != ';') {
834 if ((CUR >= '0') && (CUR <= '9'))
835 val = val * 10 + (CUR - '0');
836 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000837 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlParseCharRef: invalid decimal value\n");
841 ctxt->wellFormed = 0;
842 val = 0;
843 break;
844 }
845 NEXT;
846 }
847 if (CUR == ';')
Daniel Veillard35008381999-10-25 13:15:52 +0000848 SKIP(1); /* on purpose to avoid reentrancy problems with NEXT */
Daniel Veillardb05deb71999-08-10 19:04:08 +0000849 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000850 ctxt->errNo = XML_ERR_INVALID_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
852 ctxt->sax->error(ctxt->userData,
853 "xmlParseCharRef: invalid value\n");
854 ctxt->wellFormed = 0;
855 }
856
857 /*
858 * [ WFC: Legal Character ]
859 * Characters referred to using character references must match the
860 * production for Char.
861 */
862 if (IS_CHAR(val)) {
863 return(val);
864 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000865 ctxt->errNo = XML_ERR_INVALID_CHAR;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000867 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
Daniel Veillardb05deb71999-08-10 19:04:08 +0000868 val);
869 ctxt->wellFormed = 0;
870 }
871 return(0);
872}
873
874/**
Daniel Veillard10a2c651999-12-12 13:03:50 +0000875 * xmlParseStringCharRef:
876 * @ctxt: an XML parser context
877 * @str: a pointer to an index in the string
878 *
879 * parse Reference declarations, variant parsing from a string rather
880 * than an an input flow.
881 *
882 * [66] CharRef ::= '&#' [0-9]+ ';' |
883 * '&#x' [0-9a-fA-F]+ ';'
884 *
885 * [ WFC: Legal Character ]
886 * Characters referred to using character references must match the
887 * production for Char.
888 *
889 * Returns the value parsed (as an int), 0 in case of error, str will be
890 * updated to the current value of the index
891 */
892int
893xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
894 const xmlChar *ptr;
895 xmlChar cur;
896 int val = 0;
897
898 if ((str == NULL) || (*str == NULL)) return(0);
899 ptr = *str;
900 cur = *ptr;
Daniel Veillard0caf07a1999-12-21 16:25:49 +0000901 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +0000902 ptr += 3;
903 cur = *ptr;
904 while (cur != ';') {
905 if ((cur >= '0') && (cur <= '9'))
906 val = val * 16 + (cur - '0');
907 else if ((cur >= 'a') && (cur <= 'f'))
908 val = val * 16 + (cur - 'a') + 10;
909 else if ((cur >= 'A') && (cur <= 'F'))
910 val = val * 16 + (cur - 'A') + 10;
911 else {
912 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
914 ctxt->sax->error(ctxt->userData,
915 "xmlParseCharRef: invalid hexadecimal value\n");
916 ctxt->wellFormed = 0;
917 val = 0;
918 break;
919 }
920 ptr++;
921 cur = *ptr;
922 }
923 if (cur == ';')
924 ptr++;
Daniel Veillard0142b842000-01-14 14:45:24 +0000925 } else if ((cur == '&') && (ptr[1] == '#')){
Daniel Veillard10a2c651999-12-12 13:03:50 +0000926 ptr += 2;
927 cur = *ptr;
928 while (cur != ';') {
929 if ((cur >= '0') && (cur <= '9'))
930 val = val * 10 + (cur - '0');
931 else {
932 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
934 ctxt->sax->error(ctxt->userData,
935 "xmlParseCharRef: invalid decimal value\n");
936 ctxt->wellFormed = 0;
937 val = 0;
938 break;
939 }
940 ptr++;
941 cur = *ptr;
942 }
943 if (cur == ';')
944 ptr++;
945 } else {
946 ctxt->errNo = XML_ERR_INVALID_CHARREF;
947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
948 ctxt->sax->error(ctxt->userData,
949 "xmlParseCharRef: invalid value\n");
950 ctxt->wellFormed = 0;
951 return(0);
952 }
953 *str = ptr;
954
955 /*
956 * [ WFC: Legal Character ]
957 * Characters referred to using character references must match the
958 * production for Char.
959 */
960 if (IS_CHAR(val)) {
961 return(val);
962 } else {
963 ctxt->errNo = XML_ERR_INVALID_CHAR;
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "CharRef: invalid xmlChar value %d\n", val);
967 ctxt->wellFormed = 0;
968 }
969 return(0);
970}
971
972/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000973 * xmlParserHandleReference:
974 * @ctxt: the parser context
975 *
976 * [67] Reference ::= EntityRef | CharRef
977 *
978 * [68] EntityRef ::= '&' Name ';'
979 *
980 * [ WFC: Entity Declared ]
981 * the Name given in the entity reference must match that in an entity
982 * declaration, except that well-formed documents need not declare any
983 * of the following entities: amp, lt, gt, apos, quot.
984 *
985 * [ WFC: Parsed Entity ]
986 * An entity reference must not contain the name of an unparsed entity
987 *
988 * [66] CharRef ::= '&#' [0-9]+ ';' |
989 * '&#x' [0-9a-fA-F]+ ';'
990 *
991 * A PEReference may have been detectect in the current input stream
992 * the handling is done accordingly to
993 * http://www.w3.org/TR/REC-xml#entproc
994 */
995void
996xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
997 xmlParserInputPtr input;
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000998 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000999 xmlEntityPtr ent = NULL;
1000
Daniel Veillard35008381999-10-25 13:15:52 +00001001 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00001002 return;
1003 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001004 if (CUR != '&') return;
1005 GROW;
1006 if ((CUR == '&') && (NXT(1) == '#')) {
1007 switch(ctxt->instate) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001008 case XML_PARSER_ENTITY_DECL:
1009 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001010 case XML_PARSER_CDATA_SECTION:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001011 case XML_PARSER_COMMENT:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001012 /* we just ignore it there */
1013 return;
1014 case XML_PARSER_START_TAG:
1015 return;
1016 case XML_PARSER_END_TAG:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001017 return;
1018 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001019 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1021 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1022 ctxt->wellFormed = 0;
1023 return;
1024 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001025 case XML_PARSER_START:
1026 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001027 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1029 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1030 ctxt->wellFormed = 0;
1031 return;
1032 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001033 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1035 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1036 ctxt->wellFormed = 0;
1037 return;
1038 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001039 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1041 ctxt->sax->error(ctxt->userData,
1042 "CharRef are forbiden in DTDs!\n");
1043 ctxt->wellFormed = 0;
1044 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001045 case XML_PARSER_ENTITY_VALUE:
1046 /*
1047 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001048 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001049 * entity value to be able to save the internal
1050 * subset of the document.
1051 * This will be handled by xmlDecodeEntities
1052 */
1053 return;
1054 case XML_PARSER_CONTENT:
1055 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardb96e6431999-08-29 21:02:19 +00001056 /* !!! this may not be Ok for UTF-8, multibyte sequence */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001057 ctxt->token = xmlParseCharRef(ctxt);
1058 return;
1059 }
1060 return;
1061 }
1062
1063 switch(ctxt->instate) {
1064 case XML_PARSER_CDATA_SECTION:
1065 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001066 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001067 case XML_PARSER_COMMENT:
1068 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001069 case XML_PARSER_START_TAG:
1070 return;
1071 case XML_PARSER_END_TAG:
1072 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001073 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001074 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1076 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1077 ctxt->wellFormed = 0;
1078 return;
1079 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001080 case XML_PARSER_START:
1081 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001082 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1085 ctxt->wellFormed = 0;
1086 return;
1087 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001088 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1090 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1091 ctxt->wellFormed = 0;
1092 return;
1093 case XML_PARSER_ENTITY_VALUE:
1094 /*
1095 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001096 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001097 * entity value to be able to save the internal
1098 * subset of the document.
1099 * This will be handled by xmlDecodeEntities
1100 */
1101 return;
1102 case XML_PARSER_ATTRIBUTE_VALUE:
1103 /*
1104 * NOTE: in the case of attributes values, we don't do the
1105 * substitution here unless we are in a mode where
1106 * the parser is explicitely asked to substitute
1107 * entities. The SAX callback is called with values
1108 * without entity substitution.
1109 * This will then be handled by xmlDecodeEntities
1110 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00001111 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001112 case XML_PARSER_ENTITY_DECL:
1113 /*
1114 * we just ignore it there
1115 * the substitution will be done once the entity is referenced
1116 */
1117 return;
1118 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001119 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1121 ctxt->sax->error(ctxt->userData,
1122 "Entity references are forbiden in DTDs!\n");
1123 ctxt->wellFormed = 0;
1124 return;
1125 case XML_PARSER_CONTENT:
Daniel Veillardb96e6431999-08-29 21:02:19 +00001126 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001127 }
1128
1129 NEXT;
1130 name = xmlScanName(ctxt);
1131 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001132 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1134 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1135 ctxt->wellFormed = 0;
1136 ctxt->token = '&';
1137 return;
1138 }
1139 if (NXT(xmlStrlen(name)) != ';') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001140 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1142 ctxt->sax->error(ctxt->userData,
1143 "Entity reference: ';' expected\n");
1144 ctxt->wellFormed = 0;
1145 ctxt->token = '&';
Daniel Veillard6454aec1999-09-02 22:04:43 +00001146 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001147 return;
1148 }
1149 SKIP(xmlStrlen(name) + 1);
1150 if (ctxt->sax != NULL) {
1151 if (ctxt->sax->getEntity != NULL)
1152 ent = ctxt->sax->getEntity(ctxt->userData, name);
1153 }
1154
1155 /*
1156 * [ WFC: Entity Declared ]
1157 * the Name given in the entity reference must match that in an entity
1158 * declaration, except that well-formed documents need not declare any
1159 * of the following entities: amp, lt, gt, apos, quot.
1160 */
1161 if (ent == NULL)
1162 ent = xmlGetPredefinedEntity(name);
1163 if (ent == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001164 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1166 ctxt->sax->error(ctxt->userData,
1167 "Entity reference: entity %s not declared\n",
1168 name);
1169 ctxt->wellFormed = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +00001170 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001171 return;
1172 }
1173
1174 /*
1175 * [ WFC: Parsed Entity ]
1176 * An entity reference must not contain the name of an unparsed entity
1177 */
1178 if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001179 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1181 ctxt->sax->error(ctxt->userData,
1182 "Entity reference to unparsed entity %s\n", name);
1183 ctxt->wellFormed = 0;
1184 }
1185
1186 if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
1187 ctxt->token = ent->content[0];
Daniel Veillard6454aec1999-09-02 22:04:43 +00001188 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001189 return;
1190 }
1191 input = xmlNewEntityInputStream(ctxt, ent);
1192 xmlPushInput(ctxt, input);
Daniel Veillard6454aec1999-09-02 22:04:43 +00001193 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001194 return;
1195}
1196
1197/**
1198 * xmlParserHandlePEReference:
1199 * @ctxt: the parser context
1200 *
1201 * [69] PEReference ::= '%' Name ';'
1202 *
1203 * [ WFC: No Recursion ]
1204 * TODO A parsed entity must not contain a recursive
1205 * reference to itself, either directly or indirectly.
1206 *
1207 * [ WFC: Entity Declared ]
1208 * In a document without any DTD, a document with only an internal DTD
1209 * subset which contains no parameter entity references, or a document
1210 * with "standalone='yes'", ... ... The declaration of a parameter
1211 * entity must precede any reference to it...
1212 *
1213 * [ VC: Entity Declared ]
1214 * In a document with an external subset or external parameter entities
1215 * with "standalone='no'", ... ... The declaration of a parameter entity
1216 * must precede any reference to it...
1217 *
1218 * [ WFC: In DTD ]
1219 * Parameter-entity references may only appear in the DTD.
1220 * NOTE: misleading but this is handled.
1221 *
1222 * A PEReference may have been detected in the current input stream
1223 * the handling is done accordingly to
1224 * http://www.w3.org/TR/REC-xml#entproc
1225 * i.e.
1226 * - Included in literal in entity values
1227 * - Included as Paraemeter Entity reference within DTDs
1228 */
1229void
1230xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001231 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001232 xmlEntityPtr entity = NULL;
1233 xmlParserInputPtr input;
1234
Daniel Veillard35008381999-10-25 13:15:52 +00001235 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00001236 return;
1237 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001238 if (CUR != '%') return;
1239 switch(ctxt->instate) {
1240 case XML_PARSER_CDATA_SECTION:
1241 return;
1242 case XML_PARSER_COMMENT:
1243 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001244 case XML_PARSER_START_TAG:
1245 return;
1246 case XML_PARSER_END_TAG:
1247 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001248 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001249 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1251 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1252 ctxt->wellFormed = 0;
1253 return;
1254 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001255 case XML_PARSER_START:
1256 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001257 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1259 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1260 ctxt->wellFormed = 0;
1261 return;
1262 case XML_PARSER_ENTITY_DECL:
1263 case XML_PARSER_CONTENT:
1264 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001265 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001266 /* we just ignore it there */
1267 return;
1268 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001269 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1271 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1272 ctxt->wellFormed = 0;
1273 return;
1274 case XML_PARSER_ENTITY_VALUE:
1275 /*
1276 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001277 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001278 * entity value to be able to save the internal
1279 * subset of the document.
1280 * This will be handled by xmlDecodeEntities
1281 */
1282 return;
1283 case XML_PARSER_DTD:
1284 /*
1285 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1286 * In the internal DTD subset, parameter-entity references
1287 * can occur only where markup declarations can occur, not
1288 * within markup declarations.
1289 * In that case this is handled in xmlParseMarkupDecl
1290 */
1291 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1292 return;
1293 }
1294
1295 NEXT;
1296 name = xmlParseName(ctxt);
1297 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001298 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1300 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1301 ctxt->wellFormed = 0;
1302 } else {
1303 if (CUR == ';') {
1304 NEXT;
1305 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1306 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1307 if (entity == NULL) {
1308
1309 /*
1310 * [ WFC: Entity Declared ]
1311 * In a document without any DTD, a document with only an
1312 * internal DTD subset which contains no parameter entity
1313 * references, or a document with "standalone='yes'", ...
1314 * ... The declaration of a parameter entity must precede
1315 * any reference to it...
1316 */
1317 if ((ctxt->standalone == 1) ||
1318 ((ctxt->hasExternalSubset == 0) &&
1319 (ctxt->hasPErefs == 0))) {
1320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1321 ctxt->sax->error(ctxt->userData,
1322 "PEReference: %%%s; not found\n", name);
1323 ctxt->wellFormed = 0;
1324 } else {
1325 /*
1326 * [ VC: Entity Declared ]
1327 * In a document with an external subset or external
1328 * parameter entities with "standalone='no'", ...
1329 * ... The declaration of a parameter entity must precede
1330 * any reference to it...
1331 */
1332 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1333 ctxt->sax->warning(ctxt->userData,
1334 "PEReference: %%%s; not found\n", name);
1335 ctxt->valid = 0;
1336 }
1337 } else {
1338 if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1339 (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1340 /*
1341 * TODO !!!! handle the extra spaces added before and after
1342 * c.f. http://www.w3.org/TR/REC-xml#as-PE
1343 * TODO !!!! Avoid quote processing in parameters value
1344 * c.f. http://www.w3.org/TR/REC-xml#inliteral
1345 */
1346 input = xmlNewEntityInputStream(ctxt, entity);
1347 xmlPushInput(ctxt, input);
1348 } else {
1349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1350 ctxt->sax->error(ctxt->userData,
1351 "xmlHandlePEReference: %s is not a parameter entity\n",
1352 name);
1353 ctxt->wellFormed = 0;
1354 }
1355 }
1356 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001357 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1359 ctxt->sax->error(ctxt->userData,
1360 "xmlHandlePEReference: expecting ';'\n");
1361 ctxt->wellFormed = 0;
1362 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00001363 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001364 }
1365}
1366
Daniel Veillard011b63c1999-06-02 17:44:04 +00001367/*
1368 * Macro used to grow the current buffer.
1369 */
1370#define growBuffer(buffer) { \
1371 buffer##_size *= 2; \
Daniel Veillard0142b842000-01-14 14:45:24 +00001372 buffer = (xmlChar *) \
1373 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00001374 if (buffer == NULL) { \
1375 perror("realloc failed"); \
Daniel Veillard0142b842000-01-14 14:45:24 +00001376 return(NULL); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00001377 } \
1378}
1379
Daniel Veillard011b63c1999-06-02 17:44:04 +00001380/**
1381 * xmlDecodeEntities:
1382 * @ctxt: the parser context
1383 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1384 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001385 * @end: an end marker xmlChar, 0 if none
1386 * @end2: an end marker xmlChar, 0 if none
1387 * @end3: an end marker xmlChar, 0 if none
Daniel Veillard011b63c1999-06-02 17:44:04 +00001388 *
1389 * [67] Reference ::= EntityRef | CharRef
1390 *
1391 * [69] PEReference ::= '%' Name ';'
1392 *
1393 * Returns A newly allocated string with the substitution done. The caller
1394 * must deallocate it !
1395 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001396xmlChar *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001397xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001398 xmlChar end, xmlChar end2, xmlChar end3) {
1399 xmlChar *buffer = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001400 int buffer_size = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001401 xmlChar *out = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001402
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001403 xmlChar *current = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001404 xmlEntityPtr ent;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001405 int nbchars = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001406 unsigned int max = (unsigned int) len;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001407 xmlChar cur;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001408
1409 /*
1410 * allocate a translation buffer.
1411 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001412 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001413 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
Daniel Veillard011b63c1999-06-02 17:44:04 +00001414 if (buffer == NULL) {
1415 perror("xmlDecodeEntities: malloc failed");
1416 return(NULL);
1417 }
1418 out = buffer;
1419
1420 /*
1421 * Ok loop until we reach one of the ending char or a size limit.
1422 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001423 cur = CUR;
1424 while ((nbchars < max) && (cur != end) &&
1425 (cur != end2) && (cur != end3)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001426
Daniel Veillardb05deb71999-08-10 19:04:08 +00001427 if (cur == 0) break;
1428 if ((cur == '&') && (NXT(1) == '#')) {
1429 int val = xmlParseCharRef(ctxt);
1430 *out++ = val;
1431 nbchars += 3;
1432 } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1433 ent = xmlParseEntityRef(ctxt);
1434 if ((ent != NULL) &&
1435 (ctxt->replaceEntities != 0)) {
1436 current = ent->content;
1437 while (*current != 0) {
1438 *out++ = *current++;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001439 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001440 int index = out - buffer;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001441
Daniel Veillardb05deb71999-08-10 19:04:08 +00001442 growBuffer(buffer);
1443 out = &buffer[index];
Daniel Veillard011b63c1999-06-02 17:44:04 +00001444 }
1445 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001446 nbchars += 3 + xmlStrlen(ent->name);
1447 } else if (ent != NULL) {
1448 int i = xmlStrlen(ent->name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001449 const xmlChar *cur = ent->name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001450
1451 nbchars += i + 2;
1452 *out++ = '&';
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001453 if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001454 int index = out - buffer;
1455
1456 growBuffer(buffer);
1457 out = &buffer[index];
1458 }
1459 for (;i > 0;i--)
1460 *out++ = *cur++;
1461 *out++ = ';';
Daniel Veillard011b63c1999-06-02 17:44:04 +00001462 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001463 } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001464 /*
1465 * a PEReference induce to switch the entity flow,
1466 * we break here to flush the current set of chars
1467 * parsed if any. We will be called back later.
1468 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001469 if (nbchars != 0) break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001470
1471 xmlParsePEReference(ctxt);
1472
1473 /*
1474 * Pop-up of finished entities.
1475 */
1476 while ((CUR == 0) && (ctxt->inputNr > 1))
1477 xmlPopInput(ctxt);
1478
Daniel Veillardb05deb71999-08-10 19:04:08 +00001479 break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001480 } else {
Daniel Veillardb96e6431999-08-29 21:02:19 +00001481 /* invalid for UTF-8 , use COPY(out); !!!!!! */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001482 *out++ = cur;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001483 nbchars++;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001484 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
Raph Levien05240da1999-06-15 21:27:11 +00001485 int index = out - buffer;
1486
1487 growBuffer(buffer);
1488 out = &buffer[index];
1489 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00001490 NEXT;
1491 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001492 cur = CUR;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001493 }
1494 *out++ = 0;
1495 return(buffer);
1496}
1497
Daniel Veillard10a2c651999-12-12 13:03:50 +00001498/**
1499 * xmlStringDecodeEntities:
1500 * @ctxt: the parser context
1501 * @str: the input string
1502 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1503 * @end: an end marker xmlChar, 0 if none
1504 * @end2: an end marker xmlChar, 0 if none
1505 * @end3: an end marker xmlChar, 0 if none
1506 *
1507 * [67] Reference ::= EntityRef | CharRef
1508 *
1509 * [69] PEReference ::= '%' Name ';'
1510 *
1511 * Returns A newly allocated string with the substitution done. The caller
1512 * must deallocate it !
1513 */
1514xmlChar *
1515xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1516 xmlChar end, xmlChar end2, xmlChar end3) {
1517 xmlChar *buffer = NULL;
1518 int buffer_size = 0;
1519 xmlChar *out = NULL;
1520
1521 xmlChar *current = NULL;
1522 xmlEntityPtr ent;
1523 xmlChar cur;
1524
1525 /*
1526 * allocate a translation buffer.
1527 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001528 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001529 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1530 if (buffer == NULL) {
1531 perror("xmlDecodeEntities: malloc failed");
1532 return(NULL);
1533 }
1534 out = buffer;
1535
1536 /*
1537 * Ok loop until we reach one of the ending char or a size limit.
1538 */
1539 cur = *str;
1540 while ((cur != 0) && (cur != end) &&
1541 (cur != end2) && (cur != end3)) {
1542
1543 if (cur == 0) break;
1544 if ((cur == '&') && (str[1] == '#')) {
1545 int val = xmlParseStringCharRef(ctxt, &str);
1546 if (val != 0)
1547 *out++ = val;
1548 } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1549 ent = xmlParseStringEntityRef(ctxt, &str);
1550 if ((ent != NULL) &&
1551 (ctxt->replaceEntities != 0)) {
1552 current = ent->content;
1553 while (*current != 0) {
1554 *out++ = *current++;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001555 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001556 int index = out - buffer;
1557
1558 growBuffer(buffer);
1559 out = &buffer[index];
1560 }
1561 }
1562 } else if (ent != NULL) {
1563 int i = xmlStrlen(ent->name);
1564 const xmlChar *cur = ent->name;
1565
1566 *out++ = '&';
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001567 if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001568 int index = out - buffer;
1569
1570 growBuffer(buffer);
1571 out = &buffer[index];
1572 }
1573 for (;i > 0;i--)
1574 *out++ = *cur++;
1575 *out++ = ';';
1576 }
1577 } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1578 ent = xmlParseStringPEReference(ctxt, &str);
1579 if (ent != NULL) {
1580 current = ent->content;
1581 while (*current != 0) {
1582 *out++ = *current++;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001583 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001584 int index = out - buffer;
1585
1586 growBuffer(buffer);
1587 out = &buffer[index];
1588 }
1589 }
1590 }
1591 } else {
1592 /* invalid for UTF-8 , use COPY(out); !!!!!! */
1593 *out++ = cur;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001594 if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001595 int index = out - buffer;
1596
1597 growBuffer(buffer);
1598 out = &buffer[index];
1599 }
1600 str++;
1601 }
1602 cur = *str;
1603 }
1604 *out = 0;
1605 return(buffer);
1606}
1607
Daniel Veillard260a68f1998-08-13 03:39:55 +00001608
1609/************************************************************************
1610 * *
Daniel Veillard27d88741999-05-29 11:51:49 +00001611 * Commodity functions to handle encodings *
1612 * *
1613 ************************************************************************/
1614
1615/**
1616 * xmlSwitchEncoding:
1617 * @ctxt: the parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00001618 * @enc: the encoding value (number)
Daniel Veillard27d88741999-05-29 11:51:49 +00001619 *
1620 * change the input functions when discovering the character encoding
1621 * of a given entity.
Daniel Veillard27d88741999-05-29 11:51:49 +00001622 */
1623void
1624xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1625{
1626 switch (enc) {
1627 case XML_CHAR_ENCODING_ERROR:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001628 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1630 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1631 ctxt->wellFormed = 0;
1632 break;
1633 case XML_CHAR_ENCODING_NONE:
1634 /* let's assume it's UTF-8 without the XML decl */
1635 return;
1636 case XML_CHAR_ENCODING_UTF8:
1637 /* default encoding, no conversion should be needed */
1638 return;
1639 case XML_CHAR_ENCODING_UTF16LE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001640 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642 ctxt->sax->error(ctxt->userData,
1643 "char encoding UTF16 little endian not supported\n");
1644 break;
1645 case XML_CHAR_ENCODING_UTF16BE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001646 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1648 ctxt->sax->error(ctxt->userData,
1649 "char encoding UTF16 big endian not supported\n");
1650 break;
1651 case XML_CHAR_ENCODING_UCS4LE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001652 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1654 ctxt->sax->error(ctxt->userData,
1655 "char encoding USC4 little endian not supported\n");
1656 break;
1657 case XML_CHAR_ENCODING_UCS4BE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001658 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "char encoding USC4 big endian not supported\n");
1662 break;
1663 case XML_CHAR_ENCODING_EBCDIC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001664 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1666 ctxt->sax->error(ctxt->userData,
1667 "char encoding EBCDIC not supported\n");
1668 break;
1669 case XML_CHAR_ENCODING_UCS4_2143:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001670 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1672 ctxt->sax->error(ctxt->userData,
1673 "char encoding UCS4 2143 not supported\n");
1674 break;
1675 case XML_CHAR_ENCODING_UCS4_3412:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001676 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1678 ctxt->sax->error(ctxt->userData,
1679 "char encoding UCS4 3412 not supported\n");
1680 break;
1681 case XML_CHAR_ENCODING_UCS2:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001682 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1684 ctxt->sax->error(ctxt->userData,
1685 "char encoding UCS2 not supported\n");
1686 break;
1687 case XML_CHAR_ENCODING_8859_1:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001688 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001689 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1690 ctxt->sax->error(ctxt->userData,
1691 "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1692 break;
1693 case XML_CHAR_ENCODING_8859_2:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001694 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696 ctxt->sax->error(ctxt->userData,
1697 "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1698 break;
1699 case XML_CHAR_ENCODING_8859_3:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001700 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702 ctxt->sax->error(ctxt->userData,
1703 "char encoding ISO_8859_3 not supported\n");
1704 break;
1705 case XML_CHAR_ENCODING_8859_4:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001706 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1708 ctxt->sax->error(ctxt->userData,
1709 "char encoding ISO_8859_4 not supported\n");
1710 break;
1711 case XML_CHAR_ENCODING_8859_5:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001712 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1714 ctxt->sax->error(ctxt->userData,
1715 "char encoding ISO_8859_5 not supported\n");
1716 break;
1717 case XML_CHAR_ENCODING_8859_6:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001718 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1720 ctxt->sax->error(ctxt->userData,
1721 "char encoding ISO_8859_6 not supported\n");
1722 break;
1723 case XML_CHAR_ENCODING_8859_7:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001724 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1726 ctxt->sax->error(ctxt->userData,
1727 "char encoding ISO_8859_7 not supported\n");
1728 break;
1729 case XML_CHAR_ENCODING_8859_8:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001730 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1732 ctxt->sax->error(ctxt->userData,
1733 "char encoding ISO_8859_8 not supported\n");
1734 break;
1735 case XML_CHAR_ENCODING_8859_9:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001736 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1738 ctxt->sax->error(ctxt->userData,
1739 "char encoding ISO_8859_9 not supported\n");
1740 break;
1741 case XML_CHAR_ENCODING_2022_JP:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001742 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1744 ctxt->sax->error(ctxt->userData,
1745 "char encoding ISO-2022-JPnot supported\n");
1746 break;
1747 case XML_CHAR_ENCODING_SHIFT_JIS:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001748 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1750 ctxt->sax->error(ctxt->userData,
1751 "char encoding Shift_JISnot supported\n");
1752 break;
1753 case XML_CHAR_ENCODING_EUC_JP:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001754 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00001755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1756 ctxt->sax->error(ctxt->userData,
1757 "char encoding EUC-JPnot supported\n");
1758 break;
1759 }
1760}
1761
1762/************************************************************************
1763 * *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001764 * Commodity functions to handle xmlChars *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001765 * *
1766 ************************************************************************/
1767
Daniel Veillard11e00581998-10-24 18:27:49 +00001768/**
1769 * xmlStrndup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001770 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001771 * @len: the len of @cur
1772 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001773 * a strndup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001774 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001775 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001776 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001777xmlChar *
1778xmlStrndup(const xmlChar *cur, int len) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001779 xmlChar *ret;
1780
1781 if ((cur == NULL) || (len < 0)) return(NULL);
1782 ret = xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001783 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001784 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001785 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001786 return(NULL);
1787 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001788 memcpy(ret, cur, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001789 ret[len] = 0;
1790 return(ret);
1791}
1792
Daniel Veillard11e00581998-10-24 18:27:49 +00001793/**
1794 * xmlStrdup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001795 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001796 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001797 * a strdup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001798 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001799 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001800 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001801xmlChar *
1802xmlStrdup(const xmlChar *cur) {
1803 const xmlChar *p = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001804
Daniel Veillard10a2c651999-12-12 13:03:50 +00001805 if (cur == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001806 while (IS_CHAR(*p)) p++;
1807 return(xmlStrndup(cur, p - cur));
1808}
1809
Daniel Veillard11e00581998-10-24 18:27:49 +00001810/**
1811 * xmlCharStrndup:
1812 * @cur: the input char *
1813 * @len: the len of @cur
1814 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001815 * a strndup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001816 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001817 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001818 */
1819
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001820xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001821xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001822 int i;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001823 xmlChar *ret;
1824
1825 if ((cur == NULL) || (len < 0)) return(NULL);
1826 ret = xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001827 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001828 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001829 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001830 return(NULL);
1831 }
1832 for (i = 0;i < len;i++)
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001833 ret[i] = (xmlChar) cur[i];
Daniel Veillard260a68f1998-08-13 03:39:55 +00001834 ret[len] = 0;
1835 return(ret);
1836}
1837
Daniel Veillard11e00581998-10-24 18:27:49 +00001838/**
1839 * xmlCharStrdup:
1840 * @cur: the input char *
1841 * @len: the len of @cur
1842 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001843 * a strdup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001844 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001845 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001846 */
1847
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001848xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001849xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001850 const char *p = cur;
1851
Daniel Veillard10a2c651999-12-12 13:03:50 +00001852 if (cur == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001853 while (*p != '\0') p++;
1854 return(xmlCharStrndup(cur, p - cur));
1855}
1856
Daniel Veillard11e00581998-10-24 18:27:49 +00001857/**
1858 * xmlStrcmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001859 * @str1: the first xmlChar *
1860 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001861 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001862 * a strcmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001863 *
1864 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001865 */
1866
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001867int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001868xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001869 register int tmp;
1870
Daniel Veillard10a2c651999-12-12 13:03:50 +00001871 if ((str1 == NULL) && (str2 == NULL)) return(0);
1872 if (str1 == NULL) return(-1);
1873 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001874 do {
1875 tmp = *str1++ - *str2++;
1876 if (tmp != 0) return(tmp);
1877 } while ((*str1 != 0) && (*str2 != 0));
1878 return (*str1 - *str2);
1879}
1880
Daniel Veillard11e00581998-10-24 18:27:49 +00001881/**
1882 * xmlStrncmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001883 * @str1: the first xmlChar *
1884 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00001885 * @len: the max comparison length
1886 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001887 * a strncmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001888 *
1889 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001890 */
1891
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001892int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001893xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001894 register int tmp;
1895
1896 if (len <= 0) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00001897 if ((str1 == NULL) && (str2 == NULL)) return(0);
1898 if (str1 == NULL) return(-1);
1899 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001900 do {
1901 tmp = *str1++ - *str2++;
1902 if (tmp != 0) return(tmp);
1903 len--;
1904 if (len <= 0) return(0);
1905 } while ((*str1 != 0) && (*str2 != 0));
1906 return (*str1 - *str2);
1907}
1908
Daniel Veillard11e00581998-10-24 18:27:49 +00001909/**
1910 * xmlStrchr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001911 * @str: the xmlChar * array
1912 * @val: the xmlChar to search
Daniel Veillard11e00581998-10-24 18:27:49 +00001913 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001914 * a strchr for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001915 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001916 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001917 */
1918
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001919const xmlChar *
1920xmlStrchr(const xmlChar *str, xmlChar val) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001921 if (str == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001922 while (*str != 0) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001923 if (*str == val) return((xmlChar *) str);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001924 str++;
1925 }
1926 return(NULL);
1927}
1928
Daniel Veillard11e00581998-10-24 18:27:49 +00001929/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001930 * xmlStrstr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001931 * @str: the xmlChar * array (haystack)
1932 * @val: the xmlChar to search (needle)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001933 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001934 * a strstr for xmlChar's
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001935 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001936 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001937 */
1938
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001939const xmlChar *
1940xmlStrstr(const xmlChar *str, xmlChar *val) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001941 int n;
1942
1943 if (str == NULL) return(NULL);
1944 if (val == NULL) return(NULL);
1945 n = xmlStrlen(val);
1946
1947 if (n == 0) return(str);
1948 while (*str != 0) {
1949 if (*str == *val) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001950 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001951 }
1952 str++;
1953 }
1954 return(NULL);
1955}
1956
1957/**
1958 * xmlStrsub:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001959 * @str: the xmlChar * array (haystack)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001960 * @start: the index of the first char (zero based)
1961 * @len: the length of the substring
1962 *
1963 * Extract a substring of a given string
1964 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001965 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001966 */
1967
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001968xmlChar *
1969xmlStrsub(const xmlChar *str, int start, int len) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001970 int i;
1971
1972 if (str == NULL) return(NULL);
1973 if (start < 0) return(NULL);
1974 if (len < 0) return(NULL);
1975
1976 for (i = 0;i < start;i++) {
1977 if (*str == 0) return(NULL);
1978 str++;
1979 }
1980 if (*str == 0) return(NULL);
1981 return(xmlStrndup(str, len));
1982}
1983
1984/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001985 * xmlStrlen:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001986 * @str: the xmlChar * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001987 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00001988 * length of a xmlChar's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00001989 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001990 * Returns the number of xmlChar contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001991 */
1992
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001993int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001994xmlStrlen(const xmlChar *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001995 int len = 0;
1996
1997 if (str == NULL) return(0);
1998 while (*str != 0) {
1999 str++;
2000 len++;
2001 }
2002 return(len);
2003}
2004
Daniel Veillard11e00581998-10-24 18:27:49 +00002005/**
2006 * xmlStrncat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002007 * @cur: the original xmlChar * array
2008 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00002009 * @len: the length of @add
2010 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002011 * a strncat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002012 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002013 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002014 */
2015
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002016xmlChar *
2017xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002018 int size;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002019 xmlChar *ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002020
2021 if ((add == NULL) || (len == 0))
2022 return(cur);
2023 if (cur == NULL)
2024 return(xmlStrndup(add, len));
2025
2026 size = xmlStrlen(cur);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002027 ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002028 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00002029 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002030 (size + len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002031 return(cur);
2032 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002033 memcpy(&ret[size], add, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002034 ret[size + len] = 0;
2035 return(ret);
2036}
2037
Daniel Veillard11e00581998-10-24 18:27:49 +00002038/**
2039 * xmlStrcat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002040 * @cur: the original xmlChar * array
2041 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00002042 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002043 * a strcat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002044 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002045 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002046 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002047xmlChar *
2048xmlStrcat(xmlChar *cur, const xmlChar *add) {
2049 const xmlChar *p = add;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002050
2051 if (add == NULL) return(cur);
2052 if (cur == NULL)
2053 return(xmlStrdup(add));
2054
2055 while (IS_CHAR(*p)) p++;
2056 return(xmlStrncat(cur, add, p - add));
2057}
2058
2059/************************************************************************
2060 * *
2061 * Commodity functions, cleanup needed ? *
2062 * *
2063 ************************************************************************/
2064
Daniel Veillard11e00581998-10-24 18:27:49 +00002065/**
2066 * areBlanks:
2067 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002068 * @str: a xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002069 * @len: the size of @str
2070 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002071 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00002072 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002073 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002074 */
2075
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002076static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002077 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002078 xmlNodePtr lastChild;
2079
2080 for (i = 0;i < len;i++)
2081 if (!(IS_BLANK(str[i]))) return(0);
2082
2083 if (CUR != '<') return(0);
Daniel Veillard517752b1999-04-05 12:20:10 +00002084 if (ctxt->node == NULL) return(0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002085 if (ctxt->myDoc != NULL) {
2086 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2087 if (ret == 0) return(1);
2088 if (ret == 1) return(0);
2089 }
2090 /*
2091 * heuristic
2092 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00002093 lastChild = xmlGetLastChild(ctxt->node);
2094 if (lastChild == NULL) {
2095 if (ctxt->node->content != NULL) return(0);
2096 } else if (xmlNodeIsText(lastChild))
2097 return(0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002098 else if ((ctxt->node->childs != NULL) &&
2099 (xmlNodeIsText(ctxt->node->childs)))
2100 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002101 return(1);
2102}
2103
Daniel Veillard11e00581998-10-24 18:27:49 +00002104/**
2105 * xmlHandleEntity:
2106 * @ctxt: an XML parser context
2107 * @entity: an XML entity pointer.
2108 *
2109 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +00002110 * stream ? When do we just handle that as a set of chars ?
Daniel Veillardb05deb71999-08-10 19:04:08 +00002111 *
2112 * OBSOLETE: to be removed at some point.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002113 */
2114
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002115void
2116xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002117 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +00002118 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002119
2120 if (entity->content == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002121 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002123 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00002124 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002125 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002126 return;
2127 }
2128 len = xmlStrlen(entity->content);
2129 if (len <= 2) goto handle_as_char;
2130
2131 /*
2132 * Redefine its content as an input stream.
2133 */
Daniel Veillardccb09631998-10-27 06:21:04 +00002134 input = xmlNewEntityInputStream(ctxt, entity);
2135 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002136 return;
2137
2138handle_as_char:
2139 /*
2140 * Just handle the content as a set of chars.
2141 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002142 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002143 ctxt->sax->characters(ctxt->userData, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002144
2145}
2146
2147/*
2148 * Forward definition for recusive behaviour.
2149 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002150void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2151void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002152
2153/************************************************************************
2154 * *
2155 * Extra stuff for namespace support *
2156 * Relates to http://www.w3.org/TR/WD-xml-names *
2157 * *
2158 ************************************************************************/
2159
Daniel Veillard11e00581998-10-24 18:27:49 +00002160/**
2161 * xmlNamespaceParseNCName:
2162 * @ctxt: an XML parser context
2163 *
2164 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002165 *
2166 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2167 *
2168 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2169 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +00002170 *
2171 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002172 */
2173
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002174xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002175xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002176 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002177 int len = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002178
2179 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002180
2181 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2182 (CUR == '.') || (CUR == '-') ||
2183 (CUR == '_') ||
2184 (IS_COMBINING(CUR)) ||
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002185 (IS_EXTENDER(CUR))) {
2186 buf[len++] = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002187 NEXT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002188 if (len >= XML_MAX_NAMELEN) {
2189 fprintf(stderr,
2190 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
2191 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2192 (CUR == '.') || (CUR == '-') ||
2193 (CUR == '_') ||
2194 (IS_COMBINING(CUR)) ||
2195 (IS_EXTENDER(CUR)))
2196 NEXT;
2197 break;
2198 }
2199 }
2200 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002201}
2202
Daniel Veillard11e00581998-10-24 18:27:49 +00002203/**
2204 * xmlNamespaceParseQName:
2205 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002206 * @prefix: a xmlChar **
Daniel Veillard11e00581998-10-24 18:27:49 +00002207 *
2208 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00002209 *
2210 * [NS 5] QName ::= (Prefix ':')? LocalPart
2211 *
2212 * [NS 6] Prefix ::= NCName
2213 *
2214 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +00002215 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00002216 * Returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +00002217 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002218 */
2219
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002220xmlChar *
2221xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
2222 xmlChar *ret = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002223
2224 *prefix = NULL;
2225 ret = xmlNamespaceParseNCName(ctxt);
2226 if (CUR == ':') {
2227 *prefix = ret;
2228 NEXT;
2229 ret = xmlNamespaceParseNCName(ctxt);
2230 }
2231
2232 return(ret);
2233}
2234
Daniel Veillard11e00581998-10-24 18:27:49 +00002235/**
Daniel Veillard517752b1999-04-05 12:20:10 +00002236 * xmlSplitQName:
2237 * @name: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002238 * @prefix: a xmlChar **
Daniel Veillard517752b1999-04-05 12:20:10 +00002239 *
2240 * parse an XML qualified name string
2241 *
2242 * [NS 5] QName ::= (Prefix ':')? LocalPart
2243 *
2244 * [NS 6] Prefix ::= NCName
2245 *
2246 * [NS 7] LocalPart ::= NCName
2247 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00002248 * Returns the local part, and prefix is updated
Daniel Veillard517752b1999-04-05 12:20:10 +00002249 * to get the Prefix if any.
2250 */
2251
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002252xmlChar *
2253xmlSplitQName(const xmlChar *name, xmlChar **prefix) {
2254 xmlChar *ret = NULL;
2255 const xmlChar *q;
2256 const xmlChar *cur = name;
Daniel Veillard517752b1999-04-05 12:20:10 +00002257
2258 *prefix = NULL;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002259
2260 /* xml: prefix is not really a namespace */
2261 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2262 (cur[2] == 'l') && (cur[3] == ':'))
2263 return(xmlStrdup(name));
2264
Daniel Veillard517752b1999-04-05 12:20:10 +00002265 if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
2266 q = cur++;
2267
2268 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
2269 (*cur == '.') || (*cur == '-') ||
2270 (*cur == '_') ||
2271 (IS_COMBINING(*cur)) ||
2272 (IS_EXTENDER(*cur)))
2273 cur++;
2274
2275 ret = xmlStrndup(q, cur - q);
2276
2277 if (*cur == ':') {
2278 cur++;
2279 if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
2280 *prefix = ret;
2281
2282 q = cur++;
2283
2284 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
2285 (*cur == '.') || (*cur == '-') ||
2286 (*cur == '_') ||
2287 (IS_COMBINING(*cur)) ||
2288 (IS_EXTENDER(*cur)))
2289 cur++;
2290
2291 ret = xmlStrndup(q, cur - q);
2292 }
2293
2294 return(ret);
2295}
2296/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002297 * xmlNamespaceParseNSDef:
2298 * @ctxt: an XML parser context
2299 *
2300 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002301 *
2302 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
2303 *
2304 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +00002305 *
2306 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00002307 */
2308
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002309xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002310xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002311 xmlChar *name = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002312
2313 if ((CUR == 'x') && (NXT(1) == 'm') &&
2314 (NXT(2) == 'l') && (NXT(3) == 'n') &&
2315 (NXT(4) == 's')) {
2316 SKIP(5);
2317 if (CUR == ':') {
2318 NEXT;
2319 name = xmlNamespaceParseNCName(ctxt);
2320 }
2321 }
2322 return(name);
2323}
2324
Daniel Veillard11e00581998-10-24 18:27:49 +00002325/**
2326 * xmlParseQuotedString:
2327 * @ctxt: an XML parser context
2328 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002329 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillardb05deb71999-08-10 19:04:08 +00002330 * To be removed at next drop of binary compatibility
Daniel Veillard1e346af1999-02-22 10:33:01 +00002331 *
2332 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002333 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002334xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002335xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002336 xmlChar *buf = NULL;
2337 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002338 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002339 xmlChar c;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002340
Daniel Veillard10a2c651999-12-12 13:03:50 +00002341 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2342 if (buf == NULL) {
2343 fprintf(stderr, "malloc of %d byte failed\n", size);
2344 return(NULL);
2345 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002346 if (CUR == '"') {
2347 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002348 c = CUR;
2349 while (IS_CHAR(c) && (c != '"')) {
2350 if (len + 1 >= size) {
2351 size *= 2;
2352 buf = xmlRealloc(buf, size * sizeof(xmlChar));
2353 if (buf == NULL) {
2354 fprintf(stderr, "realloc of %d byte failed\n", size);
2355 return(NULL);
2356 }
2357 }
2358 buf[len++] = c;
2359 NEXT;
2360 c = CUR;
2361 }
2362 if (c != '"') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002363 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00002365 ctxt->sax->error(ctxt->userData,
2366 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002367 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002368 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002369 NEXT;
2370 }
2371 } else if (CUR == '\''){
2372 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002373 c = CUR;
2374 while (IS_CHAR(c) && (c != '\'')) {
2375 if (len + 1 >= size) {
2376 size *= 2;
2377 buf = xmlRealloc(buf, size * sizeof(xmlChar));
2378 if (buf == NULL) {
2379 fprintf(stderr, "realloc of %d byte failed\n", size);
2380 return(NULL);
2381 }
2382 }
2383 buf[len++] = c;
2384 NEXT;
2385 c = CUR;
2386 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002387 if (CUR != '\'') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002388 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00002390 ctxt->sax->error(ctxt->userData,
2391 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002392 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002393 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002394 NEXT;
2395 }
2396 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002397 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002398}
2399
Daniel Veillard11e00581998-10-24 18:27:49 +00002400/**
2401 * xmlParseNamespace:
2402 * @ctxt: an XML parser context
2403 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002404 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2405 *
2406 * This is what the older xml-name Working Draft specified, a bunch of
2407 * other stuff may still rely on it, so support is still here as
Daniel Veillard51e3b151999-11-12 17:02:31 +00002408 * if it was declared on the root of the Tree:-(
Daniel Veillardb05deb71999-08-10 19:04:08 +00002409 *
2410 * To be removed at next drop of binary compatibility
Daniel Veillard260a68f1998-08-13 03:39:55 +00002411 */
2412
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002413void
2414xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002415 xmlChar *href = NULL;
2416 xmlChar *prefix = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002417 int garbage = 0;
2418
2419 /*
2420 * We just skipped "namespace" or "xml:namespace"
2421 */
2422 SKIP_BLANKS;
2423
2424 while (IS_CHAR(CUR) && (CUR != '>')) {
2425 /*
2426 * We can have "ns" or "prefix" attributes
2427 * Old encoding as 'href' or 'AS' attributes is still supported
2428 */
2429 if ((CUR == 'n') && (NXT(1) == 's')) {
2430 garbage = 0;
2431 SKIP(2);
2432 SKIP_BLANKS;
2433
2434 if (CUR != '=') continue;
2435 NEXT;
2436 SKIP_BLANKS;
2437
2438 href = xmlParseQuotedString(ctxt);
2439 SKIP_BLANKS;
2440 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2441 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2442 garbage = 0;
2443 SKIP(4);
2444 SKIP_BLANKS;
2445
2446 if (CUR != '=') continue;
2447 NEXT;
2448 SKIP_BLANKS;
2449
2450 href = xmlParseQuotedString(ctxt);
2451 SKIP_BLANKS;
2452 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2453 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2454 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2455 garbage = 0;
2456 SKIP(6);
2457 SKIP_BLANKS;
2458
2459 if (CUR != '=') continue;
2460 NEXT;
2461 SKIP_BLANKS;
2462
2463 prefix = xmlParseQuotedString(ctxt);
2464 SKIP_BLANKS;
2465 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2466 garbage = 0;
2467 SKIP(2);
2468 SKIP_BLANKS;
2469
2470 if (CUR != '=') continue;
2471 NEXT;
2472 SKIP_BLANKS;
2473
2474 prefix = xmlParseQuotedString(ctxt);
2475 SKIP_BLANKS;
2476 } else if ((CUR == '?') && (NXT(1) == '>')) {
2477 garbage = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002478 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002479 } else {
2480 /*
2481 * Found garbage when parsing the namespace
2482 */
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002483 if (!garbage) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002485 ctxt->sax->error(ctxt->userData,
2486 "xmlParseNamespace found garbage\n");
2487 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002488 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002489 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002490 NEXT;
2491 }
2492 }
2493
2494 MOVETO_ENDTAG(CUR_PTR);
2495 NEXT;
2496
2497 /*
2498 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002499 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +00002500 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002501 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
Daniel Veillard517752b1999-04-05 12:20:10 +00002502 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00002503
Daniel Veillard6454aec1999-09-02 22:04:43 +00002504 if (prefix != NULL) xmlFree(prefix);
2505 if (href != NULL) xmlFree(href);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002506}
2507
2508/************************************************************************
2509 * *
2510 * The parser itself *
2511 * Relates to http://www.w3.org/TR/REC-xml *
2512 * *
2513 ************************************************************************/
2514
Daniel Veillard11e00581998-10-24 18:27:49 +00002515/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00002516 * xmlScanName:
2517 * @ctxt: an XML parser context
2518 *
2519 * Trickery: parse an XML name but without consuming the input flow
2520 * Needed for rollback cases.
2521 *
2522 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2523 * CombiningChar | Extender
2524 *
2525 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2526 *
2527 * [6] Names ::= Name (S Name)*
2528 *
2529 * Returns the Name parsed or NULL
2530 */
2531
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002532xmlChar *
Daniel Veillardb05deb71999-08-10 19:04:08 +00002533xmlScanName(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002534 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillardb05deb71999-08-10 19:04:08 +00002535 int len = 0;
2536
2537 GROW;
2538 if (!IS_LETTER(CUR) && (CUR != '_') &&
2539 (CUR != ':')) {
2540 return(NULL);
2541 }
2542
2543 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2544 (NXT(len) == '.') || (NXT(len) == '-') ||
2545 (NXT(len) == '_') || (NXT(len) == ':') ||
2546 (IS_COMBINING(NXT(len))) ||
2547 (IS_EXTENDER(NXT(len)))) {
2548 buf[len] = NXT(len);
2549 len++;
2550 if (len >= XML_MAX_NAMELEN) {
2551 fprintf(stderr,
2552 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2553 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2554 (NXT(len) == '.') || (NXT(len) == '-') ||
2555 (NXT(len) == '_') || (NXT(len) == ':') ||
2556 (IS_COMBINING(NXT(len))) ||
2557 (IS_EXTENDER(NXT(len))))
2558 len++;
2559 break;
2560 }
2561 }
2562 return(xmlStrndup(buf, len));
2563}
2564
2565/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002566 * xmlParseName:
2567 * @ctxt: an XML parser context
2568 *
2569 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002570 *
2571 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2572 * CombiningChar | Extender
2573 *
2574 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2575 *
2576 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002577 *
2578 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002579 */
2580
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002581xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002582xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002583 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002584 int len = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002585 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002586
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002587 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002588 cur = CUR;
2589 if (!IS_LETTER(cur) && (cur != '_') &&
2590 (cur != ':')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002591 return(NULL);
2592 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002593
Daniel Veillardb05deb71999-08-10 19:04:08 +00002594 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2595 (cur == '.') || (cur == '-') ||
2596 (cur == '_') || (cur == ':') ||
2597 (IS_COMBINING(cur)) ||
2598 (IS_EXTENDER(cur))) {
2599 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002600 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002601 cur = CUR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002602 if (len >= XML_MAX_NAMELEN) {
2603 fprintf(stderr,
2604 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardb05deb71999-08-10 19:04:08 +00002605 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2606 (cur == '.') || (cur == '-') ||
2607 (cur == '_') || (cur == ':') ||
2608 (IS_COMBINING(cur)) ||
2609 (IS_EXTENDER(cur))) {
2610 NEXT;
2611 cur = CUR;
2612 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002613 break;
2614 }
2615 }
2616 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002617}
2618
Daniel Veillard11e00581998-10-24 18:27:49 +00002619/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00002620 * xmlParseStringName:
2621 * @ctxt: an XML parser context
2622 * @str: a pointer to an index in the string
2623 *
2624 * parse an XML name.
2625 *
2626 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2627 * CombiningChar | Extender
2628 *
2629 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2630 *
2631 * [6] Names ::= Name (S Name)*
2632 *
2633 * Returns the Name parsed or NULL. The str pointer
2634 * is updated to the current location in the string.
2635 */
2636
2637xmlChar *
2638xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2639 const xmlChar *ptr;
2640 const xmlChar *start;
2641 xmlChar cur;
2642
2643 if ((str == NULL) || (*str == NULL)) return(NULL);
2644
2645 start = ptr = *str;
2646 cur = *ptr;
2647 if (!IS_LETTER(cur) && (cur != '_') &&
2648 (cur != ':')) {
2649 return(NULL);
2650 }
2651
2652 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2653 (cur == '.') || (cur == '-') ||
2654 (cur == '_') || (cur == ':') ||
2655 (IS_COMBINING(cur)) ||
2656 (IS_EXTENDER(cur))) {
2657 ptr++;
2658 cur = *ptr;
2659 }
2660 *str = ptr;
2661 return(xmlStrndup(start, ptr - start ));
2662}
2663
2664/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002665 * xmlParseNmtoken:
2666 * @ctxt: an XML parser context
2667 *
2668 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002669 *
2670 * [7] Nmtoken ::= (NameChar)+
2671 *
2672 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002673 *
2674 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002675 */
2676
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002677xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002678xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002679 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002680 int len = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002681
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002682 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002683 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2684 (CUR == '.') || (CUR == '-') ||
2685 (CUR == '_') || (CUR == ':') ||
2686 (IS_COMBINING(CUR)) ||
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002687 (IS_EXTENDER(CUR))) {
2688 buf[len++] = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002689 NEXT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002690 if (len >= XML_MAX_NAMELEN) {
2691 fprintf(stderr,
2692 "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2693 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2694 (CUR == '.') || (CUR == '-') ||
2695 (CUR == '_') || (CUR == ':') ||
2696 (IS_COMBINING(CUR)) ||
2697 (IS_EXTENDER(CUR)))
2698 NEXT;
2699 break;
2700 }
2701 }
2702 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002703}
2704
Daniel Veillard11e00581998-10-24 18:27:49 +00002705/**
2706 * xmlParseEntityValue:
2707 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00002708 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00002709 *
2710 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002711 *
2712 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2713 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002714 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002715 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002716 */
2717
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002718xmlChar *
2719xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002720 xmlChar *buf = NULL;
2721 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002722 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002723 xmlChar c;
2724 xmlChar stop;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002725 xmlChar *ret = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002726 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002727
Daniel Veillard10a2c651999-12-12 13:03:50 +00002728 if (CUR == '"') stop = '"';
2729 else if (CUR == '\'') stop = '\'';
2730 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002731 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002733 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002734 ctxt->wellFormed = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002735 return(NULL);
2736 }
2737 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2738 if (buf == NULL) {
2739 fprintf(stderr, "malloc of %d byte failed\n", size);
2740 return(NULL);
2741 }
2742
2743 /*
2744 * The content of the entity definition is copied in a buffer.
2745 */
2746
2747 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2748 input = ctxt->input;
2749 GROW;
2750 NEXT;
2751 c = CUR;
2752 /*
2753 * NOTE: 4.4.5 Included in Literal
2754 * When a parameter entity reference appears in a literal entity
2755 * value, ... a single or double quote character in the replacement
2756 * text is always treated as a normal data character and will not
2757 * terminate the literal.
2758 * In practice it means we stop the loop only when back at parsing
2759 * the initial entity and the quote is found
2760 */
2761 while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
2762 if (len + 1 >= size) {
2763 size *= 2;
2764 buf = xmlRealloc(buf, size * sizeof(xmlChar));
2765 if (buf == NULL) {
2766 fprintf(stderr, "realloc of %d byte failed\n", size);
2767 return(NULL);
2768 }
2769 }
2770 buf[len++] = c;
2771 NEXT;
2772 /*
2773 * Pop-up of finished entities.
2774 */
2775 while ((CUR == 0) && (ctxt->inputNr > 1))
2776 xmlPopInput(ctxt);
2777 c = CUR;
2778 if (c == 0) {
2779 GROW;
2780 c = CUR;
2781 }
2782 }
2783 buf[len] = 0;
2784
2785 /*
2786 * Then PEReference entities are substituted.
2787 */
2788 if (c != stop) {
2789 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2792 ctxt->wellFormed = 0;
2793 } else {
2794 NEXT;
2795 /*
2796 * NOTE: 4.4.7 Bypassed
2797 * When a general entity reference appears in the EntityValue in
2798 * an entity declaration, it is bypassed and left as is.
2799 * so XML_SUBSTITUTE_REF is not set.
2800 */
2801 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2802 0, 0, 0);
2803 if (orig != NULL)
2804 *orig = buf;
2805 else
2806 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002807 }
2808
2809 return(ret);
2810}
2811
Daniel Veillard11e00581998-10-24 18:27:49 +00002812/**
2813 * xmlParseAttValue:
2814 * @ctxt: an XML parser context
2815 *
2816 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00002817 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00002818 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00002819 *
2820 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2821 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002822 *
Daniel Veillard7f858501999-11-17 17:32:38 +00002823 * 3.3.3 Attribute-Value Normalization:
2824 * Before the value of an attribute is passed to the application or
2825 * checked for validity, the XML processor must normalize it as follows:
2826 * - a character reference is processed by appending the referenced
2827 * character to the attribute value
2828 * - an entity reference is processed by recursively processing the
2829 * replacement text of the entity
2830 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2831 * appending #x20 to the normalized value, except that only a single
2832 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2833 * parsed entity or the literal entity value of an internal parsed entity
2834 * - other characters are processed by appending them to the normalized value
Daniel Veillard07136651999-11-18 14:02:05 +00002835 * If the declared value is not CDATA, then the XML processor must further
2836 * process the normalized attribute value by discarding any leading and
2837 * trailing space (#x20) characters, and by replacing sequences of space
2838 * (#x20) characters by a single space (#x20) character.
2839 * All attributes for which no declaration has been read should be treated
2840 * by a non-validating parser as if declared CDATA.
Daniel Veillard7f858501999-11-17 17:32:38 +00002841 *
2842 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002843 */
2844
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002845xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002846xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002847 xmlChar limit = 0;
2848 xmlChar *buffer = NULL;
2849 int buffer_size = 0;
2850 xmlChar *out = NULL;
2851
2852 xmlChar *current = NULL;
2853 xmlEntityPtr ent;
2854 xmlChar cur;
Daniel Veillard7f858501999-11-17 17:32:38 +00002855
Daniel Veillard260a68f1998-08-13 03:39:55 +00002856
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002857 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002858 if (CUR == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002859 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard7f858501999-11-17 17:32:38 +00002860 limit = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002861 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002862 } else if (CUR == '\'') {
Daniel Veillard7f858501999-11-17 17:32:38 +00002863 limit = '\'';
Daniel Veillardb05deb71999-08-10 19:04:08 +00002864 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002865 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002866 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002867 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002869 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002870 ctxt->wellFormed = 0;
Daniel Veillard7f858501999-11-17 17:32:38 +00002871 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002872 }
2873
Daniel Veillard7f858501999-11-17 17:32:38 +00002874 /*
2875 * allocate a translation buffer.
2876 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002877 buffer_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard7f858501999-11-17 17:32:38 +00002878 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2879 if (buffer == NULL) {
2880 perror("xmlParseAttValue: malloc failed");
2881 return(NULL);
2882 }
2883 out = buffer;
2884
2885 /*
2886 * Ok loop until we reach one of the ending char or a size limit.
2887 */
2888 cur = CUR;
2889 while ((cur != limit) && (cur != '<')) {
Daniel Veillard7f858501999-11-17 17:32:38 +00002890 if (cur == 0) break;
2891 if ((cur == '&') && (NXT(1) == '#')) {
2892 int val = xmlParseCharRef(ctxt);
2893 *out++ = val;
Daniel Veillard7f858501999-11-17 17:32:38 +00002894 } else if (cur == '&') {
2895 ent = xmlParseEntityRef(ctxt);
2896 if ((ent != NULL) &&
2897 (ctxt->replaceEntities != 0)) {
2898 current = ent->content;
2899 while (*current != 0) {
2900 *out++ = *current++;
2901 if (out - buffer > buffer_size - 10) {
2902 int index = out - buffer;
2903
2904 growBuffer(buffer);
2905 out = &buffer[index];
2906 }
2907 }
2908 } else if (ent != NULL) {
2909 int i = xmlStrlen(ent->name);
2910 const xmlChar *cur = ent->name;
2911
2912 *out++ = '&';
2913 if (out - buffer > buffer_size - i - 10) {
2914 int index = out - buffer;
2915
2916 growBuffer(buffer);
2917 out = &buffer[index];
2918 }
2919 for (;i > 0;i--)
2920 *out++ = *cur++;
2921 *out++ = ';';
2922 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002923 } else {
2924 /* invalid for UTF-8 , use COPY(out); !!!!!! */
2925 if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
Daniel Veillard07136651999-11-18 14:02:05 +00002926 *out++ = 0x20;
2927 if (out - buffer > buffer_size - 10) {
2928 int index = out - buffer;
2929
2930 growBuffer(buffer);
2931 out = &buffer[index];
Daniel Veillard7f858501999-11-17 17:32:38 +00002932 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002933 } else {
2934 *out++ = cur;
2935 if (out - buffer > buffer_size - 10) {
2936 int index = out - buffer;
2937
2938 growBuffer(buffer);
2939 out = &buffer[index];
2940 }
Daniel Veillard7f858501999-11-17 17:32:38 +00002941 }
2942 NEXT;
2943 }
2944 cur = CUR;
2945 }
2946 *out++ = 0;
2947 if (CUR == '<') {
2948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2949 ctxt->sax->error(ctxt->userData,
2950 "Unescaped '<' not allowed in attributes values\n");
2951 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2952 ctxt->wellFormed = 0;
2953 } else if (CUR != limit) {
2954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2956 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2957 ctxt->wellFormed = 0;
2958 } else
2959 NEXT;
2960 return(buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002961}
2962
Daniel Veillard11e00581998-10-24 18:27:49 +00002963/**
2964 * xmlParseSystemLiteral:
2965 * @ctxt: an XML parser context
2966 *
2967 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00002968 *
2969 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00002970 *
2971 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002972 */
2973
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002974xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002975xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002976 xmlChar *buf = NULL;
2977 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002978 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002979 xmlChar cur;
2980 xmlChar stop;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002981
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002982 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002983 if (CUR == '"') {
2984 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002985 stop = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002986 } else if (CUR == '\'') {
2987 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002988 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00002989 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00002991 ctxt->sax->error(ctxt->userData,
2992 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002993 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002994 ctxt->wellFormed = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002995 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002996 }
2997
Daniel Veillard10a2c651999-12-12 13:03:50 +00002998 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2999 if (buf == NULL) {
3000 fprintf(stderr, "malloc of %d byte failed\n", size);
3001 return(NULL);
3002 }
3003 cur = CUR;
3004 while ((IS_CHAR(cur)) && (cur != stop)) {
3005 if (len + 1 >= size) {
3006 size *= 2;
3007 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3008 if (buf == NULL) {
3009 fprintf(stderr, "realloc of %d byte failed\n", size);
3010 return(NULL);
3011 }
3012 }
3013 buf[len++] = cur;
3014 NEXT;
3015 cur = CUR;
3016 if (cur == 0) {
3017 GROW;
3018 SHRINK;
3019 cur = CUR;
3020 }
3021 }
3022 buf[len] = 0;
3023 if (!IS_CHAR(cur)) {
3024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3025 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3026 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3027 ctxt->wellFormed = 0;
3028 } else {
3029 NEXT;
3030 }
3031 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003032}
3033
Daniel Veillard11e00581998-10-24 18:27:49 +00003034/**
3035 * xmlParsePubidLiteral:
3036 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00003037 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003038 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00003039 *
3040 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3041 *
3042 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003043 */
3044
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003045xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003046xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003047 xmlChar *buf = NULL;
3048 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003049 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003050 xmlChar cur;
3051 xmlChar stop;
Daniel Veillard6077d031999-10-09 09:11:45 +00003052
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003053 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003054 if (CUR == '"') {
3055 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003056 stop = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00003057 } else if (CUR == '\'') {
3058 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003059 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00003060 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003062 ctxt->sax->error(ctxt->userData,
3063 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003064 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003065 ctxt->wellFormed = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003066 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003067 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003068 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3069 if (buf == NULL) {
3070 fprintf(stderr, "malloc of %d byte failed\n", size);
3071 return(NULL);
3072 }
3073 cur = CUR;
3074 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3075 if (len + 1 >= size) {
3076 size *= 2;
3077 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3078 if (buf == NULL) {
3079 fprintf(stderr, "realloc of %d byte failed\n", size);
3080 return(NULL);
3081 }
3082 }
3083 buf[len++] = cur;
3084 NEXT;
3085 cur = CUR;
3086 if (cur == 0) {
3087 GROW;
3088 SHRINK;
3089 cur = CUR;
3090 }
3091 }
3092 buf[len] = 0;
3093 if (cur != stop) {
3094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3095 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
3096 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3097 ctxt->wellFormed = 0;
3098 } else {
3099 NEXT;
3100 }
3101 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003102}
3103
Daniel Veillard11e00581998-10-24 18:27:49 +00003104/**
3105 * xmlParseCharData:
3106 * @ctxt: an XML parser context
3107 * @cdata: int indicating whether we are within a CDATA section
3108 *
3109 * parse a CharData section.
3110 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003111 *
3112 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3113 */
3114
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003115void
3116xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003117 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003118 int nbchar = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003119 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003120
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003121 SHRINK;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003122 cur = CUR;
3123 while ((IS_CHAR(cur)) && (cur != '<') &&
3124 (cur != '&')) {
3125 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003126 (NXT(2) == '>')) {
3127 if (cdata) break;
3128 else {
3129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003130 ctxt->sax->warning(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003131 "Sequence ']]>' not allowed in content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003132 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003133 }
3134 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003135 buf[nbchar++] = CUR;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003136 if (nbchar == XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003137 /*
3138 * Ok the segment is to be consumed as chars.
3139 */
3140 if (ctxt->sax != NULL) {
3141 if (areBlanks(ctxt, buf, nbchar)) {
3142 if (ctxt->sax->ignorableWhitespace != NULL)
3143 ctxt->sax->ignorableWhitespace(ctxt->userData,
3144 buf, nbchar);
3145 } else {
3146 if (ctxt->sax->characters != NULL)
3147 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3148 }
3149 }
3150 nbchar = 0;
3151 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003152 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003153 cur = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003154 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003155 if (nbchar != 0) {
3156 /*
3157 * Ok the segment is to be consumed as chars.
3158 */
3159 if (ctxt->sax != NULL) {
3160 if (areBlanks(ctxt, buf, nbchar)) {
3161 if (ctxt->sax->ignorableWhitespace != NULL)
3162 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3163 } else {
3164 if (ctxt->sax->characters != NULL)
3165 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3166 }
3167 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003168 }
3169}
3170
Daniel Veillard11e00581998-10-24 18:27:49 +00003171/**
3172 * xmlParseExternalID:
3173 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003174 * @publicID: a xmlChar** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00003175 * @strict: indicate whether we should restrict parsing to only
3176 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00003177 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003178 * Parse an External ID or a Public ID
3179 *
3180 * NOTE: Productions [75] and [83] interract badly since [75] can generate
3181 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00003182 *
3183 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3184 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00003185 *
3186 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3187 *
3188 * Returns the function returns SystemLiteral and in the second
3189 * case publicID receives PubidLiteral, is strict is off
3190 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003191 */
3192
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003193xmlChar *
3194xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3195 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003196
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003197 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003198 if ((CUR == 'S') && (NXT(1) == 'Y') &&
3199 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3200 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3201 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003202 if (!IS_BLANK(CUR)) {
3203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003204 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003205 "Space required after 'SYSTEM'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003206 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003207 ctxt->wellFormed = 0;
3208 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003209 SKIP_BLANKS;
3210 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003211 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003213 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003214 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003215 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003216 ctxt->wellFormed = 0;
3217 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003218 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
3219 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3220 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3221 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003222 if (!IS_BLANK(CUR)) {
3223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003224 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003225 "Space required after 'PUBLIC'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003226 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003227 ctxt->wellFormed = 0;
3228 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003229 SKIP_BLANKS;
3230 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003231 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003233 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003234 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003235 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003236 ctxt->wellFormed = 0;
3237 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00003238 if (strict) {
3239 /*
3240 * We don't handle [83] so "S SystemLiteral" is required.
3241 */
3242 if (!IS_BLANK(CUR)) {
3243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003244 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003245 "Space required after the Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003246 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003247 ctxt->wellFormed = 0;
3248 }
3249 } else {
3250 /*
3251 * We handle [83] so we return immediately, if
3252 * "S SystemLiteral" is not detected. From a purely parsing
3253 * point of view that's a nice mess.
3254 */
Daniel Veillard10a2c651999-12-12 13:03:50 +00003255 const xmlChar *ptr;
3256 GROW;
3257
3258 ptr = CUR_PTR;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003259 if (!IS_BLANK(*ptr)) return(NULL);
3260
3261 while (IS_BLANK(*ptr)) ptr++;
3262 if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003263 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003264 SKIP_BLANKS;
3265 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003266 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003268 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003269 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003270 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003271 ctxt->wellFormed = 0;
3272 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003273 }
3274 return(URI);
3275}
3276
Daniel Veillard11e00581998-10-24 18:27:49 +00003277/**
3278 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00003279 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003280 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003281 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00003282 * The spec says that "For compatibility, the string "--" (double-hyphen)
3283 * must not occur within comments. "
3284 *
3285 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3286 */
Daniel Veillard517752b1999-04-05 12:20:10 +00003287void
Daniel Veillardb96e6431999-08-29 21:02:19 +00003288xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003289 xmlChar *buf = NULL;
3290 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003291 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003292 xmlChar q;
3293 xmlChar r;
3294 xmlChar cur;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003295 xmlParserInputState state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003296
3297 /*
3298 * Check that there is a comment right here.
3299 */
3300 if ((CUR != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00003301 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003302
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003303 state = ctxt->instate;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003304 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003305 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003306 SKIP(4);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003307 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3308 if (buf == NULL) {
3309 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003310 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003311 return;
3312 }
3313 q = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003314 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003315 r = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003316 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003317 cur = CUR;
3318 while (IS_CHAR(cur) &&
3319 ((cur != '>') ||
3320 (r != '-') || (q != '-'))) {
3321 if ((r == '-') && (q == '-')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003323 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003324 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003325 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003326 ctxt->wellFormed = 0;
3327 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003328 if (len + 1 >= size) {
3329 size *= 2;
3330 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3331 if (buf == NULL) {
3332 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003333 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003334 return;
3335 }
3336 }
3337 buf[len++] = q;
3338 q = r;
3339 r = cur;
3340 NEXT;
3341 cur = CUR;
3342 if (cur == 0) {
3343 SHRINK;
3344 GROW;
3345 cur = CUR;
3346 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003347 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003348 buf[len] = 0;
3349 if (!IS_CHAR(cur)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003351 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00003352 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003353 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003354 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003355 } else {
3356 NEXT;
Daniel Veillardb96e6431999-08-29 21:02:19 +00003357 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003358 ctxt->sax->comment(ctxt->userData, buf);
3359 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003360 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003361 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003362}
3363
Daniel Veillard11e00581998-10-24 18:27:49 +00003364/**
3365 * xmlParsePITarget:
3366 * @ctxt: an XML parser context
3367 *
3368 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00003369 *
3370 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00003371 *
3372 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003373 */
3374
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003375xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003376xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003377 xmlChar *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003378
3379 name = xmlParseName(ctxt);
Daniel Veillard3c558c31999-12-22 11:30:41 +00003380 if ((name != NULL) &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003381 ((name[0] == 'x') || (name[0] == 'X')) &&
3382 ((name[1] == 'm') || (name[1] == 'M')) &&
3383 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillard3c558c31999-12-22 11:30:41 +00003384 int i;
3385 for (i = 0;;i++) {
3386 if (xmlW3CPIs[i] == NULL) break;
3387 if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
3388 return(name);
3389 }
3390 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3391 ctxt->sax->warning(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003392 "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003393 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003394 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003395 }
3396 return(name);
3397}
3398
Daniel Veillard11e00581998-10-24 18:27:49 +00003399/**
3400 * xmlParsePI:
3401 * @ctxt: an XML parser context
3402 *
3403 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003404 *
3405 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00003406 *
3407 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003408 */
3409
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003410void
3411xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003412 xmlChar *buf = NULL;
3413 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003414 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003415 xmlChar cur;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003416 xmlChar *target;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003417 xmlParserInputState state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003418
3419 if ((CUR == '<') && (NXT(1) == '?')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003420 state = ctxt->instate;
3421 ctxt->instate = XML_PARSER_PI;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003422 /*
3423 * this is a Processing Instruction.
3424 */
3425 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003426 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003427
3428 /*
3429 * Parse the target name and check for special support like
3430 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003431 */
3432 target = xmlParsePITarget(ctxt);
3433 if (target != NULL) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003434 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3435 if (buf == NULL) {
3436 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003437 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003438 return;
3439 }
3440 cur = CUR;
3441 if (!IS_BLANK(cur)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00003442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3443 ctxt->sax->error(ctxt->userData,
3444 "xmlParsePI: PI %s space expected\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003445 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00003446 ctxt->wellFormed = 0;
3447 }
3448 SKIP_BLANKS;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003449 cur = CUR;
3450 while (IS_CHAR(cur) &&
3451 ((cur != '?') || (NXT(1) != '>'))) {
3452 if (len + 1 >= size) {
3453 size *= 2;
3454 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3455 if (buf == NULL) {
3456 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003457 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003458 return;
3459 }
3460 }
3461 buf[len++] = cur;
Daniel Veillard517752b1999-04-05 12:20:10 +00003462 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003463 cur = CUR;
3464 if (cur == 0) {
3465 SHRINK;
3466 GROW;
3467 cur = CUR;
3468 }
3469 }
3470 buf[len] = 0;
3471 if (!IS_CHAR(cur)) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003473 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00003474 "xmlParsePI: PI %s never end ...\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003475 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
Daniel Veillard517752b1999-04-05 12:20:10 +00003476 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003477 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00003478 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003479
Daniel Veillard517752b1999-04-05 12:20:10 +00003480 /*
3481 * SAX: PI detected.
3482 */
3483 if ((ctxt->sax) &&
3484 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00003485 ctxt->sax->processingInstruction(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00003486 target, buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003487 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003488 xmlFree(buf);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003489 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003490 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00003492 ctxt->sax->error(ctxt->userData,
3493 "xmlParsePI : no target name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003494 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003495 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003496 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003497 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003498 }
3499}
3500
Daniel Veillard11e00581998-10-24 18:27:49 +00003501/**
3502 * xmlParseNotationDecl:
3503 * @ctxt: an XML parser context
3504 *
3505 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003506 *
3507 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3508 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003509 * Hence there is actually 3 choices:
3510 * 'PUBLIC' S PubidLiteral
3511 * 'PUBLIC' S PubidLiteral S SystemLiteral
3512 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00003513 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003514 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00003515 */
3516
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003517void
3518xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003519 xmlChar *name;
3520 xmlChar *Pubid;
3521 xmlChar *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003522
3523 if ((CUR == '<') && (NXT(1) == '!') &&
3524 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3525 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3526 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003527 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003528 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003529 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003530 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003532 ctxt->sax->error(ctxt->userData,
3533 "Space required after '<!NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003534 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003535 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003536 return;
3537 }
3538 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003539
3540 name = xmlParseName(ctxt);
3541 if (name == NULL) {
3542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003543 ctxt->sax->error(ctxt->userData,
3544 "NOTATION: Name expected here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003545 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003546 ctxt->wellFormed = 0;
3547 return;
3548 }
3549 if (!IS_BLANK(CUR)) {
3550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003551 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003552 "Space required after the NOTATION name'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003553 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003554 ctxt->wellFormed = 0;
3555 return;
3556 }
3557 SKIP_BLANKS;
3558
Daniel Veillard260a68f1998-08-13 03:39:55 +00003559 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003560 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003561 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00003562 Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
3563 SKIP_BLANKS;
3564
3565 if (CUR == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003566 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00003567 if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003568 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003569 } else {
3570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003571 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003572 "'>' required to close NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003573 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003574 ctxt->wellFormed = 0;
3575 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003576 xmlFree(name);
3577 if (Systemid != NULL) xmlFree(Systemid);
3578 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003579 }
3580}
3581
Daniel Veillard11e00581998-10-24 18:27:49 +00003582/**
3583 * xmlParseEntityDecl:
3584 * @ctxt: an XML parser context
3585 *
3586 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00003587 *
3588 * [70] EntityDecl ::= GEDecl | PEDecl
3589 *
3590 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3591 *
3592 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3593 *
3594 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3595 *
3596 * [74] PEDef ::= EntityValue | ExternalID
3597 *
3598 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00003599 *
3600 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003601 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003602 */
3603
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003604void
3605xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003606 xmlChar *name = NULL;
3607 xmlChar *value = NULL;
3608 xmlChar *URI = NULL, *literal = NULL;
3609 xmlChar *ndata = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003610 int isParameter = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003611 xmlChar *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003612
Daniel Veillardb05deb71999-08-10 19:04:08 +00003613 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003614 if ((CUR == '<') && (NXT(1) == '!') &&
3615 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3616 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003617 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003618 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003619 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003620 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003621 if (!IS_BLANK(CUR)) {
3622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003623 ctxt->sax->error(ctxt->userData,
3624 "Space required after '<!ENTITY'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003625 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003626 ctxt->wellFormed = 0;
3627 }
3628 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003629
3630 if (CUR == '%') {
3631 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003632 if (!IS_BLANK(CUR)) {
3633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003634 ctxt->sax->error(ctxt->userData,
3635 "Space required after '%'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003636 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003637 ctxt->wellFormed = 0;
3638 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003639 SKIP_BLANKS;
3640 isParameter = 1;
3641 }
3642
3643 name = xmlParseName(ctxt);
3644 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003646 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003647 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003648 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003649 return;
3650 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003651 if (!IS_BLANK(CUR)) {
3652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003653 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003654 "Space required after the entity name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003655 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003656 ctxt->wellFormed = 0;
3657 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003658 SKIP_BLANKS;
3659
3660 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003661 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00003662 */
3663 if (isParameter) {
3664 if ((CUR == '"') || (CUR == '\''))
Daniel Veillard011b63c1999-06-02 17:44:04 +00003665 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003666 if (value) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003667 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003668 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003669 XML_INTERNAL_PARAMETER_ENTITY,
3670 NULL, NULL, value);
3671 }
3672 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003673 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003674 if (URI) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003675 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003676 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003677 XML_EXTERNAL_PARAMETER_ENTITY,
3678 literal, URI, NULL);
3679 }
3680 }
3681 } else {
3682 if ((CUR == '"') || (CUR == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003683 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard517752b1999-04-05 12:20:10 +00003684 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003685 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003686 XML_INTERNAL_GENERAL_ENTITY,
3687 NULL, NULL, value);
3688 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003689 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003690 if ((CUR != '>') && (!IS_BLANK(CUR))) {
3691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003692 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003693 "Space required before 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003694 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003695 ctxt->wellFormed = 0;
3696 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003697 SKIP_BLANKS;
3698 if ((CUR == 'N') && (NXT(1) == 'D') &&
3699 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3700 (NXT(4) == 'A')) {
3701 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003702 if (!IS_BLANK(CUR)) {
3703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003704 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003705 "Space required after 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003706 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003707 ctxt->wellFormed = 0;
3708 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003709 SKIP_BLANKS;
3710 ndata = xmlParseName(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00003711 if ((ctxt->sax != NULL) &&
3712 (ctxt->sax->unparsedEntityDecl != NULL))
3713 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003714 literal, URI, ndata);
3715 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00003716 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003717 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003718 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3719 literal, URI, NULL);
3720 }
3721 }
3722 }
3723 SKIP_BLANKS;
3724 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003725 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003726 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003727 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003728 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003729 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003730 } else
3731 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00003732 if (orig != NULL) {
3733 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00003734 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00003735 */
3736 xmlEntityPtr cur = NULL;
3737
Daniel Veillardb05deb71999-08-10 19:04:08 +00003738 if (isParameter) {
3739 if ((ctxt->sax != NULL) &&
3740 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003741 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003742 } else {
3743 if ((ctxt->sax != NULL) &&
3744 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003745 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003746 }
3747 if (cur != NULL) {
3748 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003749 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003750 else
3751 cur->orig = orig;
3752 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00003753 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00003754 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003755 if (name != NULL) xmlFree(name);
3756 if (value != NULL) xmlFree(value);
3757 if (URI != NULL) xmlFree(URI);
3758 if (literal != NULL) xmlFree(literal);
3759 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003760 }
3761}
3762
Daniel Veillard11e00581998-10-24 18:27:49 +00003763/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003764 * xmlParseDefaultDecl:
3765 * @ctxt: an XML parser context
3766 * @value: Receive a possible fixed default value for the attribute
3767 *
3768 * Parse an attribute default declaration
3769 *
3770 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3771 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003772 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003773 * if the default declaration is the keyword #REQUIRED, then the
3774 * attribute must be specified for all elements of the type in the
3775 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003776 *
3777 * [ VC: Attribute Default Legal ]
3778 * The declared default value must meet the lexical constraints of
3779 * the declared attribute type c.f. xmlValidateAttributeDecl()
3780 *
3781 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003782 * if an attribute has a default value declared with the #FIXED
3783 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003784 *
3785 * [ WFC: No < in Attribute Values ]
3786 * handled in xmlParseAttValue()
3787 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003788 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3789 * or XML_ATTRIBUTE_FIXED.
3790 */
3791
3792int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003793xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003794 int val;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003795 xmlChar *ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003796
3797 *value = NULL;
3798 if ((CUR == '#') && (NXT(1) == 'R') &&
3799 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3800 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3801 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3802 (NXT(8) == 'D')) {
3803 SKIP(9);
3804 return(XML_ATTRIBUTE_REQUIRED);
3805 }
3806 if ((CUR == '#') && (NXT(1) == 'I') &&
3807 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3808 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3809 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3810 SKIP(8);
3811 return(XML_ATTRIBUTE_IMPLIED);
3812 }
3813 val = XML_ATTRIBUTE_NONE;
3814 if ((CUR == '#') && (NXT(1) == 'F') &&
3815 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3816 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3817 SKIP(6);
3818 val = XML_ATTRIBUTE_FIXED;
3819 if (!IS_BLANK(CUR)) {
3820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003821 ctxt->sax->error(ctxt->userData,
3822 "Space required after '#FIXED'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003823 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003824 ctxt->wellFormed = 0;
3825 }
3826 SKIP_BLANKS;
3827 }
3828 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003829 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003830 if (ret == NULL) {
3831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003832 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003833 "Attribute default value declaration error\n");
3834 ctxt->wellFormed = 0;
3835 } else
3836 *value = ret;
3837 return(val);
3838}
3839
3840/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00003841 * xmlParseNotationType:
3842 * @ctxt: an XML parser context
3843 *
3844 * parse an Notation attribute type.
3845 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003846 * Note: the leading 'NOTATION' S part has already being parsed...
3847 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003848 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3849 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003850 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003851 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00003852 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003853 *
3854 * Returns: the notation attribute tree built while parsing
3855 */
3856
3857xmlEnumerationPtr
3858xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003859 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003860 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3861
3862 if (CUR != '(') {
3863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003864 ctxt->sax->error(ctxt->userData,
3865 "'(' required to start 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003866 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003867 ctxt->wellFormed = 0;
3868 return(NULL);
3869 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003870 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003871 do {
3872 NEXT;
3873 SKIP_BLANKS;
3874 name = xmlParseName(ctxt);
3875 if (name == NULL) {
3876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003877 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003878 "Name expected in NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003879 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003880 ctxt->wellFormed = 0;
3881 return(ret);
3882 }
3883 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003884 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003885 if (cur == NULL) return(ret);
3886 if (last == NULL) ret = last = cur;
3887 else {
3888 last->next = cur;
3889 last = cur;
3890 }
3891 SKIP_BLANKS;
3892 } while (CUR == '|');
3893 if (CUR != ')') {
3894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003895 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003896 "')' required to finish NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003897 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003898 ctxt->wellFormed = 0;
3899 return(ret);
3900 }
3901 NEXT;
3902 return(ret);
3903}
3904
3905/**
3906 * xmlParseEnumerationType:
3907 * @ctxt: an XML parser context
3908 *
3909 * parse an Enumeration attribute type.
3910 *
3911 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3912 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003913 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003914 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00003915 * the declaration
3916 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003917 * Returns: the enumeration attribute tree built while parsing
3918 */
3919
3920xmlEnumerationPtr
3921xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003922 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003923 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3924
3925 if (CUR != '(') {
3926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003927 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003928 "'(' required to start ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003929 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003930 ctxt->wellFormed = 0;
3931 return(NULL);
3932 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003933 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003934 do {
3935 NEXT;
3936 SKIP_BLANKS;
3937 name = xmlParseNmtoken(ctxt);
3938 if (name == NULL) {
3939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003940 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003941 "NmToken expected in ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003942 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003943 ctxt->wellFormed = 0;
3944 return(ret);
3945 }
3946 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003947 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003948 if (cur == NULL) return(ret);
3949 if (last == NULL) ret = last = cur;
3950 else {
3951 last->next = cur;
3952 last = cur;
3953 }
3954 SKIP_BLANKS;
3955 } while (CUR == '|');
3956 if (CUR != ')') {
3957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003958 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003959 "')' required to finish ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003960 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003961 ctxt->wellFormed = 0;
3962 return(ret);
3963 }
3964 NEXT;
3965 return(ret);
3966}
3967
3968/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003969 * xmlParseEnumeratedType:
3970 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003971 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003972 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003973 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003974 *
3975 * [57] EnumeratedType ::= NotationType | Enumeration
3976 *
3977 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3978 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003979 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003980 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00003981 */
3982
Daniel Veillard1e346af1999-02-22 10:33:01 +00003983int
3984xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3985 if ((CUR == 'N') && (NXT(1) == 'O') &&
3986 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3987 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3988 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3989 SKIP(8);
3990 if (!IS_BLANK(CUR)) {
3991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003992 ctxt->sax->error(ctxt->userData,
3993 "Space required after 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003994 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003995 ctxt->wellFormed = 0;
3996 return(0);
3997 }
3998 SKIP_BLANKS;
3999 *tree = xmlParseNotationType(ctxt);
4000 if (*tree == NULL) return(0);
4001 return(XML_ATTRIBUTE_NOTATION);
4002 }
4003 *tree = xmlParseEnumerationType(ctxt);
4004 if (*tree == NULL) return(0);
4005 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004006}
4007
Daniel Veillard11e00581998-10-24 18:27:49 +00004008/**
4009 * xmlParseAttributeType:
4010 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00004011 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00004012 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004013 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00004014 *
4015 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4016 *
4017 * [55] StringType ::= 'CDATA'
4018 *
4019 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4020 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00004021 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004022 * Validity constraints for attribute values syntax are checked in
4023 * xmlValidateAttributeValue()
4024 *
4025 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004026 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00004027 * appear more than once in an XML document as a value of this type;
4028 * i.e., ID values must uniquely identify the elements which bear them.
4029 *
4030 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004031 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00004032 *
4033 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004034 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00004035 *
4036 * [ VC: IDREF ]
4037 * Values of type IDREF must match the Name production, and values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004038 * of type IDREFS must match Names; each IDREF Name must match the value
Daniel Veillardb96e6431999-08-29 21:02:19 +00004039 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00004040 * values must match the value of some ID attribute.
4041 *
4042 * [ VC: Entity Name ]
4043 * Values of type ENTITY must match the Name production, values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004044 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardb96e6431999-08-29 21:02:19 +00004045 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00004046 *
4047 * [ VC: Name Token ]
4048 * Values of type NMTOKEN must match the Nmtoken production; values
4049 * of type NMTOKENS must match Nmtokens.
4050 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004051 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00004052 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004053int
Daniel Veillard1e346af1999-02-22 10:33:01 +00004054xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004055 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004056 if ((CUR == 'C') && (NXT(1) == 'D') &&
4057 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4058 (NXT(4) == 'A')) {
4059 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004060 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004061 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
4062 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00004063 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4064 SKIP(6);
4065 return(XML_ATTRIBUTE_IDREFS);
4066 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
4067 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004068 (NXT(4) == 'F')) {
4069 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004070 return(XML_ATTRIBUTE_IDREF);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004071 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
4072 SKIP(2);
4073 return(XML_ATTRIBUTE_ID);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004074 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
4075 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4076 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4077 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004078 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004079 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
4080 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4081 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4082 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4083 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004084 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004085 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
4086 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4087 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00004088 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4089 SKIP(8);
4090 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004091 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
4092 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4093 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00004094 (NXT(6) == 'N')) {
4095 SKIP(7);
4096 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004097 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00004098 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00004099}
4100
Daniel Veillard11e00581998-10-24 18:27:49 +00004101/**
4102 * xmlParseAttributeListDecl:
4103 * @ctxt: an XML parser context
4104 *
4105 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00004106 *
4107 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4108 *
4109 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00004110 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004111 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004112void
4113xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004114 xmlChar *elemName;
4115 xmlChar *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004116 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004117
Daniel Veillard260a68f1998-08-13 03:39:55 +00004118 if ((CUR == '<') && (NXT(1) == '!') &&
4119 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4120 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4121 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004122 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004123 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004124 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004126 ctxt->sax->error(ctxt->userData,
4127 "Space required after '<!ATTLIST'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004128 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004129 ctxt->wellFormed = 0;
4130 }
4131 SKIP_BLANKS;
4132 elemName = xmlParseName(ctxt);
4133 if (elemName == NULL) {
4134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004135 ctxt->sax->error(ctxt->userData,
4136 "ATTLIST: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004137 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004138 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004139 return;
4140 }
4141 SKIP_BLANKS;
4142 while (CUR != '>') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004143 const xmlChar *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004144 int type;
4145 int def;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004146 xmlChar *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004147
Daniel Veillardb05deb71999-08-10 19:04:08 +00004148 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004149 attrName = xmlParseName(ctxt);
4150 if (attrName == NULL) {
4151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004152 ctxt->sax->error(ctxt->userData,
4153 "ATTLIST: no name for Attribute\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004154 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004155 ctxt->wellFormed = 0;
4156 break;
4157 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004158 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004159 if (!IS_BLANK(CUR)) {
4160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004161 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004162 "Space required after the attribute name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004163 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004164 ctxt->wellFormed = 0;
4165 break;
4166 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004167 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004168
Daniel Veillard1e346af1999-02-22 10:33:01 +00004169 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004170 if (type <= 0) break;
4171
Daniel Veillardb05deb71999-08-10 19:04:08 +00004172 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004173 if (!IS_BLANK(CUR)) {
4174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004175 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004176 "Space required after the attribute type\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004177 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004178 ctxt->wellFormed = 0;
4179 break;
4180 }
4181 SKIP_BLANKS;
4182
4183 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4184 if (def <= 0) break;
4185
Daniel Veillardb05deb71999-08-10 19:04:08 +00004186 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004187 if (CUR != '>') {
4188 if (!IS_BLANK(CUR)) {
4189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004190 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004191 "Space required after the attribute default value\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004192 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004193 ctxt->wellFormed = 0;
4194 break;
4195 }
4196 SKIP_BLANKS;
4197 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004198 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004200 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004201 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004202 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004203 break;
4204 }
Daniel Veillard517752b1999-04-05 12:20:10 +00004205 if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004206 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004207 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004208 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00004209 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004210 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00004211 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004212 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004213 }
4214 if (CUR == '>')
4215 NEXT;
4216
Daniel Veillard6454aec1999-09-02 22:04:43 +00004217 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004218 }
4219}
4220
Daniel Veillard11e00581998-10-24 18:27:49 +00004221/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004222 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00004223 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004224 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004225 * parse the declaration for a Mixed Element content
4226 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00004227 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004228 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4229 * '(' S? '#PCDATA' S? ')'
4230 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004231 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4232 *
4233 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004234 * The same name must not appear more than once in a single
4235 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00004236 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004237 * returns: the list of the xmlElementContentPtr describing the element choices
4238 */
4239xmlElementContentPtr
4240xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00004241 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004242 xmlChar *elem = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004243
Daniel Veillardb05deb71999-08-10 19:04:08 +00004244 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004245 if ((CUR == '#') && (NXT(1) == 'P') &&
4246 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4247 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4248 (NXT(6) == 'A')) {
4249 SKIP(7);
4250 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004251 SHRINK;
Daniel Veillard3b9def11999-01-31 22:15:06 +00004252 if (CUR == ')') {
4253 NEXT;
4254 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillardf600e251999-12-18 15:32:46 +00004255 if (CUR == '*') {
4256 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4257 NEXT;
4258 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00004259 return(ret);
4260 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004261 if ((CUR == '(') || (CUR == '|')) {
4262 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4263 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004264 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004265 while (CUR == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00004266 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004267 if (elem == NULL) {
4268 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4269 if (ret == NULL) return(NULL);
4270 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00004271 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004272 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00004273 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4274 if (n == NULL) return(NULL);
4275 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4276 cur->c2 = n;
4277 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004278 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004279 }
4280 SKIP_BLANKS;
4281 elem = xmlParseName(ctxt);
4282 if (elem == NULL) {
4283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004284 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004285 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004286 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004287 ctxt->wellFormed = 0;
4288 xmlFreeElementContent(cur);
4289 return(NULL);
4290 }
4291 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004292 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004293 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00004294 if ((CUR == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00004295 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004296 cur->c2 = xmlNewElementContent(elem,
4297 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004298 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004299 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004300 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4301 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004302 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00004303 if (elem != NULL) xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004305 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00004306 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004307 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004308 ctxt->wellFormed = 0;
4309 xmlFreeElementContent(ret);
4310 return(NULL);
4311 }
4312
4313 } else {
4314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004315 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004316 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004317 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004318 ctxt->wellFormed = 0;
4319 }
4320 return(ret);
4321}
4322
4323/**
4324 * xmlParseElementChildrenContentDecl:
4325 * @ctxt: an XML parser context
4326 *
4327 * parse the declaration for a Mixed Element content
4328 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4329 *
4330 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004331 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4332 *
4333 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4334 *
4335 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4336 *
4337 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4338 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004339 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4340 * TODO Parameter-entity replacement text must be properly nested
4341 * with parenthetized groups. That is to say, if either of the
4342 * opening or closing parentheses in a choice, seq, or Mixed
4343 * construct is contained in the replacement text for a parameter
4344 * entity, both must be contained in the same replacement text. For
4345 * interoperability, if a parameter-entity reference appears in a
4346 * choice, seq, or Mixed construct, its replacement text should not
4347 * be empty, and neither the first nor last non-blank character of
4348 * the replacement text should be a connector (| or ,).
4349 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004350 * returns: the tree of xmlElementContentPtr describing the element
4351 * hierarchy.
4352 */
4353xmlElementContentPtr
4354xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
4355 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004356 xmlChar *elem;
4357 xmlChar type = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004358
4359 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004360 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004361 if (CUR == '(') {
4362 /* Recurse on first child */
4363 NEXT;
4364 SKIP_BLANKS;
4365 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4366 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004367 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004368 } else {
4369 elem = xmlParseName(ctxt);
4370 if (elem == NULL) {
4371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004372 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004373 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004374 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004375 ctxt->wellFormed = 0;
4376 return(NULL);
4377 }
4378 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004379 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004380 if (CUR == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004381 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004382 NEXT;
4383 } else if (CUR == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004384 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004385 NEXT;
4386 } else if (CUR == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004387 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004388 NEXT;
4389 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004390 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004391 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00004392 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004393 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004394 }
4395 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004396 SHRINK;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004397 while (CUR != ')') {
4398 /*
4399 * Each loop we parse one separator and one element.
4400 */
4401 if (CUR == ',') {
4402 if (type == 0) type = CUR;
4403
4404 /*
4405 * Detect "Name | Name , Name" error
4406 */
4407 else if (type != CUR) {
4408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004409 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004410 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4411 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004412 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004413 ctxt->wellFormed = 0;
4414 xmlFreeElementContent(ret);
4415 return(NULL);
4416 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004417 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004418
4419 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4420 if (op == NULL) {
4421 xmlFreeElementContent(ret);
4422 return(NULL);
4423 }
4424 if (last == NULL) {
4425 op->c1 = ret;
4426 ret = cur = op;
4427 } else {
4428 cur->c2 = op;
4429 op->c1 = last;
4430 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00004431 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004432 }
4433 } else if (CUR == '|') {
4434 if (type == 0) type = CUR;
4435
4436 /*
4437 * Detect "Name , Name | Name" error
4438 */
4439 else if (type != CUR) {
4440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004441 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004442 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4443 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004444 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004445 ctxt->wellFormed = 0;
4446 xmlFreeElementContent(ret);
4447 return(NULL);
4448 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004449 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004450
4451 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4452 if (op == NULL) {
4453 xmlFreeElementContent(ret);
4454 return(NULL);
4455 }
4456 if (last == NULL) {
4457 op->c1 = ret;
4458 ret = cur = op;
4459 } else {
4460 cur->c2 = op;
4461 op->c1 = last;
4462 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00004463 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004464 }
4465 } else {
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004467 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004468 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4469 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004470 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004471 xmlFreeElementContent(ret);
4472 return(NULL);
4473 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004474 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004475 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004476 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004477 if (CUR == '(') {
4478 /* Recurse on second child */
4479 NEXT;
4480 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00004481 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004482 SKIP_BLANKS;
4483 } else {
4484 elem = xmlParseName(ctxt);
4485 if (elem == NULL) {
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004487 ctxt->sax->error(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004488 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004489 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004490 ctxt->wellFormed = 0;
4491 return(NULL);
4492 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004493 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004494 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004495 if (CUR == '?') {
4496 last->ocur = XML_ELEMENT_CONTENT_OPT;
4497 NEXT;
4498 } else if (CUR == '*') {
4499 last->ocur = XML_ELEMENT_CONTENT_MULT;
4500 NEXT;
4501 } else if (CUR == '+') {
4502 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4503 NEXT;
4504 } else {
4505 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4506 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004507 }
4508 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004509 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004510 }
Daniel Veillard1899e851999-02-01 12:18:54 +00004511 if ((cur != NULL) && (last != NULL)) {
4512 cur->c2 = last;
4513 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004514 NEXT;
4515 if (CUR == '?') {
4516 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4517 NEXT;
4518 } else if (CUR == '*') {
4519 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4520 NEXT;
4521 } else if (CUR == '+') {
4522 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4523 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004524 }
4525 return(ret);
4526}
4527
4528/**
4529 * xmlParseElementContentDecl:
4530 * @ctxt: an XML parser context
4531 * @name: the name of the element being defined.
4532 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00004533 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004534 * parse the declaration for an Element content either Mixed or Children,
4535 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4536 *
4537 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00004538 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004539 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00004540 */
4541
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004542int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004543xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004544 xmlElementContentPtr *result) {
4545
4546 xmlElementContentPtr tree = NULL;
4547 int res;
4548
4549 *result = NULL;
4550
4551 if (CUR != '(') {
4552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004553 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004554 "xmlParseElementContentDecl : '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004555 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004556 ctxt->wellFormed = 0;
4557 return(-1);
4558 }
4559 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004560 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004561 SKIP_BLANKS;
4562 if ((CUR == '#') && (NXT(1) == 'P') &&
4563 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4564 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4565 (NXT(6) == 'A')) {
4566 tree = xmlParseElementMixedContentDecl(ctxt);
4567 res = XML_ELEMENT_TYPE_MIXED;
4568 } else {
4569 tree = xmlParseElementChildrenContentDecl(ctxt);
4570 res = XML_ELEMENT_TYPE_ELEMENT;
4571 }
4572 SKIP_BLANKS;
4573 /****************************
4574 if (CUR != ')') {
4575 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004576 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004577 "xmlParseElementContentDecl : ')' expected\n");
4578 ctxt->wellFormed = 0;
4579 return(-1);
4580 }
4581 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00004582 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004583 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004584}
4585
Daniel Veillard11e00581998-10-24 18:27:49 +00004586/**
4587 * xmlParseElementDecl:
4588 * @ctxt: an XML parser context
4589 *
4590 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004591 *
4592 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4593 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004594 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004595 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00004596 *
4597 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00004598 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004599int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004600xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004601 xmlChar *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004602 int ret = -1;
4603 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004604
Daniel Veillardb05deb71999-08-10 19:04:08 +00004605 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004606 if ((CUR == '<') && (NXT(1) == '!') &&
4607 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4608 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4609 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004610 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004611 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004612 if (!IS_BLANK(CUR)) {
4613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004614 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004615 "Space required after 'ELEMENT'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004616 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004617 ctxt->wellFormed = 0;
4618 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004619 SKIP_BLANKS;
4620 name = xmlParseName(ctxt);
4621 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004623 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004624 "xmlParseElementDecl: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004625 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004626 ctxt->wellFormed = 0;
4627 return(-1);
4628 }
4629 if (!IS_BLANK(CUR)) {
4630 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004631 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004632 "Space required after the element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004633 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004634 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004635 }
4636 SKIP_BLANKS;
4637 if ((CUR == 'E') && (NXT(1) == 'M') &&
4638 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4639 (NXT(4) == 'Y')) {
4640 SKIP(5);
4641 /*
4642 * Element must always be empty.
4643 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004644 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004645 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
4646 (NXT(2) == 'Y')) {
4647 SKIP(3);
4648 /*
4649 * Element is a generic container.
4650 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004651 ret = XML_ELEMENT_TYPE_ANY;
4652 } else if (CUR == '(') {
4653 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004654 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004655 /*
4656 * [ WFC: PEs in Internal Subset ] error handling.
4657 */
4658 if ((CUR == '%') && (ctxt->external == 0) &&
4659 (ctxt->inputNr == 1)) {
4660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4661 ctxt->sax->error(ctxt->userData,
4662 "PEReference: forbidden within markup decl in internal subset\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004663 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004664 } else {
4665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4666 ctxt->sax->error(ctxt->userData,
4667 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004668 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004669 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004670 ctxt->wellFormed = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004671 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004672 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004673 }
Daniel Veillard686d6b62000-01-03 11:08:02 +00004674
Daniel Veillard260a68f1998-08-13 03:39:55 +00004675 SKIP_BLANKS;
Daniel Veillard686d6b62000-01-03 11:08:02 +00004676 /*
4677 * Pop-up of finished entities.
4678 */
4679 while ((CUR == 0) && (ctxt->inputNr > 1))
4680 xmlPopInput(ctxt);
4681 SKIP_BLANKS;
4682
Daniel Veillard260a68f1998-08-13 03:39:55 +00004683 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004685 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004686 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004687 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004688 ctxt->wellFormed = 0;
4689 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004690 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00004691 if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004692 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4693 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004694 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004695 if (content != NULL) {
4696 xmlFreeElementContent(content);
4697 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004698 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00004699 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004700 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004701 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004702 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004703}
4704
Daniel Veillard11e00581998-10-24 18:27:49 +00004705/**
4706 * xmlParseMarkupDecl:
4707 * @ctxt: an XML parser context
4708 *
4709 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004710 *
4711 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4712 * NotationDecl | PI | Comment
4713 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004714 * [ VC: Proper Declaration/PE Nesting ]
4715 * TODO Parameter-entity replacement text must be properly nested with
4716 * markup declarations. That is to say, if either the first character
4717 * or the last character of a markup declaration (markupdecl above) is
4718 * contained in the replacement text for a parameter-entity reference,
4719 * both must be contained in the same replacement text.
4720 *
4721 * [ WFC: PEs in Internal Subset ]
4722 * In the internal DTD subset, parameter-entity references can occur
4723 * only where markup declarations can occur, not within markup declarations.
4724 * (This does not apply to references that occur in external parameter
4725 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004726 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004727void
4728xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004729 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004730 xmlParseElementDecl(ctxt);
4731 xmlParseAttributeListDecl(ctxt);
4732 xmlParseEntityDecl(ctxt);
4733 xmlParseNotationDecl(ctxt);
4734 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00004735 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004736 /*
4737 * This is only for internal subset. On external entities,
4738 * the replacement is done before parsing stage
4739 */
4740 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4741 xmlParsePEReference(ctxt);
4742 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004743}
4744
Daniel Veillard11e00581998-10-24 18:27:49 +00004745/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004746 * xmlParseTextDecl:
4747 * @ctxt: an XML parser context
4748 *
4749 * parse an XML declaration header for external entities
4750 *
4751 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4752 *
4753 * Returns the only valuable info for an external parsed entity, the encoding
4754 */
4755
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004756xmlChar *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004757xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004758 xmlChar *version;
4759 xmlChar *encoding = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004760
4761 /*
4762 * We know that '<?xml' is here.
4763 */
4764 SKIP(5);
4765
4766 if (!IS_BLANK(CUR)) {
4767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004768 ctxt->sax->error(ctxt->userData,
4769 "Space needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004770 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004771 ctxt->wellFormed = 0;
4772 }
4773 SKIP_BLANKS;
4774
4775 /*
4776 * We may have the VersionInfo here.
4777 */
4778 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004779 if (version == NULL)
4780 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4781 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004782 xmlFree(version);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004783
4784 /*
4785 * We must have the encoding declaration
4786 */
4787 if (!IS_BLANK(CUR)) {
4788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004789 ctxt->sax->error(ctxt->userData, "Space needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004790 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004791 ctxt->wellFormed = 0;
4792 }
4793 encoding = xmlParseEncodingDecl(ctxt);
4794
4795 SKIP_BLANKS;
4796 if ((CUR == '?') && (NXT(1) == '>')) {
4797 SKIP(2);
4798 } else if (CUR == '>') {
4799 /* Deprecated old WD ... */
4800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004801 ctxt->sax->error(ctxt->userData,
4802 "XML declaration must end-up with '?>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004803 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004804 ctxt->wellFormed = 0;
4805 NEXT;
4806 } else {
4807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004808 ctxt->sax->error(ctxt->userData,
4809 "parsing XML declaration: '?>' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004810 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004811 ctxt->wellFormed = 0;
4812 MOVETO_ENDTAG(CUR_PTR);
4813 NEXT;
4814 }
4815 return(encoding);
4816}
4817
4818/*
4819 * xmlParseConditionalSections
4820 * @ctxt: an XML parser context
4821 *
4822 * TODO : Conditionnal section are not yet supported !
4823 *
4824 * [61] conditionalSect ::= includeSect | ignoreSect
4825 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4826 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4827 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4828 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4829 */
4830
4831void
4832xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4833 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4834 ctxt->sax->warning(ctxt->userData,
4835 "XML conditional section not supported\n");
4836 /*
4837 * Skip up to the end of the conditionnal section.
4838 */
Daniel Veillard71b656e2000-01-05 14:46:17 +00004839 while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>'))) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004840 NEXT;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004841 /*
4842 * Pop-up of finished entities.
4843 */
4844 while ((CUR == 0) && (ctxt->inputNr > 1))
4845 xmlPopInput(ctxt);
4846
4847 if (CUR == 0)
4848 GROW;
4849 }
4850
4851 if (CUR == 0)
4852 SHRINK;
4853
Daniel Veillard011b63c1999-06-02 17:44:04 +00004854 if (CUR == 0) {
4855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4856 ctxt->sax->error(ctxt->userData,
4857 "XML conditional section not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004858 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004859 ctxt->wellFormed = 0;
Daniel Veillard71b656e2000-01-05 14:46:17 +00004860 } else {
4861 SKIP(3);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004862 }
4863}
4864
4865/**
Daniel Veillard00fdf371999-10-08 09:40:39 +00004866 * xmlParseExternalSubset:
Daniel Veillard011b63c1999-06-02 17:44:04 +00004867 * @ctxt: an XML parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00004868 * @ExternalID: the external identifier
4869 * @SystemID: the system identifier (or URL)
Daniel Veillard011b63c1999-06-02 17:44:04 +00004870 *
4871 * parse Markup declarations from an external subset
4872 *
4873 * [30] extSubset ::= textDecl? extSubsetDecl
4874 *
4875 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004876 */
4877void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004878xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4879 const xmlChar *SystemID) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00004880 GROW;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004881 if ((CUR == '<') && (NXT(1) == '?') &&
4882 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4883 (NXT(4) == 'l')) {
Daniel Veillard294cbca1999-12-03 13:19:09 +00004884 xmlChar *decl;
4885
4886 decl = xmlParseTextDecl(ctxt);
4887 if (decl != NULL)
4888 xmlFree(decl);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004889 }
4890 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004891 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004892 }
4893 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4894 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4895
Daniel Veillardb05deb71999-08-10 19:04:08 +00004896 ctxt->instate = XML_PARSER_DTD;
4897 ctxt->external = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004898 while (((CUR == '<') && (NXT(1) == '?')) ||
4899 ((CUR == '<') && (NXT(1) == '!')) ||
4900 IS_BLANK(CUR)) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004901 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004902 int cons = ctxt->input->consumed;
4903
Daniel Veillard011b63c1999-06-02 17:44:04 +00004904 if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4905 xmlParseConditionalSections(ctxt);
4906 } else if (IS_BLANK(CUR)) {
4907 NEXT;
4908 } else if (CUR == '%') {
4909 xmlParsePEReference(ctxt);
4910 } else
4911 xmlParseMarkupDecl(ctxt);
4912
4913 /*
4914 * Pop-up of finished entities.
4915 */
4916 while ((CUR == 0) && (ctxt->inputNr > 1))
4917 xmlPopInput(ctxt);
4918
Daniel Veillardb96e6431999-08-29 21:02:19 +00004919 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4921 ctxt->sax->error(ctxt->userData,
4922 "Content error in the external subset\n");
4923 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004924 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004925 break;
4926 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004927 }
4928
4929 if (CUR != 0) {
4930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4931 ctxt->sax->error(ctxt->userData,
4932 "Extra content at the end of the document\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004933 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004934 ctxt->wellFormed = 0;
4935 }
4936
4937}
4938
4939/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004940 * xmlParseReference:
4941 * @ctxt: an XML parser context
4942 *
4943 * parse and handle entity references in content, depending on the SAX
4944 * interface, this may end-up in a call to character() if this is a
4945 * CharRef, a predefined entity, if there is no reference() callback.
4946 * or if the parser was asked to switch to that mode.
4947 *
4948 * [67] Reference ::= EntityRef | CharRef
4949 */
4950void
4951xmlParseReference(xmlParserCtxtPtr ctxt) {
4952 xmlEntityPtr ent;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004953 xmlChar *val;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004954 if (CUR != '&') return;
4955
Daniel Veillardb96e6431999-08-29 21:02:19 +00004956 if (ctxt->inputNr > 1) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004957 xmlChar cur[2] = { '&' , 0 } ;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004958
4959 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4960 ctxt->sax->characters(ctxt->userData, cur, 1);
4961 if (ctxt->token == '&')
4962 ctxt->token = 0;
4963 else {
4964 SKIP(1);
4965 }
4966 return;
4967 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004968 if (NXT(1) == '#') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004969 xmlChar out[2];
Daniel Veillard011b63c1999-06-02 17:44:04 +00004970 int val = xmlParseCharRef(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00004971 /* invalid for UTF-8 variable encoding !!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004972 out[0] = val;
4973 out[1] = 0;
4974 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4975 ctxt->sax->characters(ctxt->userData, out, 1);
4976 } else {
4977 ent = xmlParseEntityRef(ctxt);
4978 if (ent == NULL) return;
4979 if ((ent->name != NULL) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00004980 (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
4981 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4982 (ctxt->replaceEntities == 0)) {
4983 /*
4984 * Create a node.
4985 */
4986 ctxt->sax->reference(ctxt->userData, ent->name);
4987 return;
4988 } else if (ctxt->replaceEntities) {
4989 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004990
Daniel Veillardb96e6431999-08-29 21:02:19 +00004991 input = xmlNewEntityInputStream(ctxt, ent);
4992 xmlPushInput(ctxt, input);
4993 return;
4994 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004995 }
4996 val = ent->content;
4997 if (val == NULL) return;
4998 /*
4999 * inline the entity.
5000 */
5001 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5002 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5003 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005004}
5005
Daniel Veillard11e00581998-10-24 18:27:49 +00005006/**
5007 * xmlParseEntityRef:
5008 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005009 *
5010 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00005011 *
5012 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00005013 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005014 * [ WFC: Entity Declared ]
5015 * In a document without any DTD, a document with only an internal DTD
5016 * subset which contains no parameter entity references, or a document
5017 * with "standalone='yes'", the Name given in the entity reference
5018 * must match that in an entity declaration, except that well-formed
5019 * documents need not declare any of the following entities: amp, lt,
5020 * gt, apos, quot. The declaration of a parameter entity must precede
5021 * any reference to it. Similarly, the declaration of a general entity
5022 * must precede any reference to it which appears in a default value in an
5023 * attribute-list declaration. Note that if entities are declared in the
5024 * external subset or in external parameter entities, a non-validating
5025 * processor is not obligated to read and process their declarations;
5026 * for such documents, the rule that an entity must be declared is a
5027 * well-formedness constraint only if standalone='yes'.
5028 *
5029 * [ WFC: Parsed Entity ]
5030 * An entity reference must not contain the name of an unparsed entity
5031 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00005032 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005033 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005034xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005035xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005036 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00005037 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005038
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005039 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005040
Daniel Veillard260a68f1998-08-13 03:39:55 +00005041 if (CUR == '&') {
5042 NEXT;
5043 name = xmlParseName(ctxt);
5044 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005046 ctxt->sax->error(ctxt->userData,
5047 "xmlParseEntityRef: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005048 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005049 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005050 } else {
5051 if (CUR == ';') {
5052 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005053 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00005054 * Ask first SAX for entity resolution, otherwise try the
5055 * predefined set.
5056 */
5057 if (ctxt->sax != NULL) {
5058 if (ctxt->sax->getEntity != NULL)
5059 ent = ctxt->sax->getEntity(ctxt->userData, name);
5060 if (ent == NULL)
5061 ent = xmlGetPredefinedEntity(name);
5062 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005063 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005064 * [ WFC: Entity Declared ]
5065 * In a document without any DTD, a document with only an
5066 * internal DTD subset which contains no parameter entity
5067 * references, or a document with "standalone='yes'", the
5068 * Name given in the entity reference must match that in an
5069 * entity declaration, except that well-formed documents
5070 * need not declare any of the following entities: amp, lt,
5071 * gt, apos, quot.
5072 * The declaration of a parameter entity must precede any
5073 * reference to it.
5074 * Similarly, the declaration of a general entity must
5075 * precede any reference to it which appears in a default
5076 * value in an attribute-list declaration. Note that if
5077 * entities are declared in the external subset or in
5078 * external parameter entities, a non-validating processor
5079 * is not obligated to read and process their declarations;
5080 * for such documents, the rule that an entity must be
5081 * declared is a well-formedness constraint only if
5082 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005083 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005084 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005085 if ((ctxt->standalone == 1) ||
5086 ((ctxt->hasExternalSubset == 0) &&
5087 (ctxt->hasPErefs == 0))) {
5088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005089 ctxt->sax->error(ctxt->userData,
5090 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005091 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005092 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005093 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005094 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5095 ctxt->sax->warning(ctxt->userData,
5096 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005097 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005098 }
5099 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005100
5101 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005102 * [ WFC: Parsed Entity ]
5103 * An entity reference must not contain the name of an
5104 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005105 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00005106 else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5108 ctxt->sax->error(ctxt->userData,
5109 "Entity reference to unparsed entity %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005110 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005111 ctxt->wellFormed = 0;
5112 }
5113
5114 /*
5115 * [ WFC: No External Entity References ]
5116 * Attribute values cannot contain direct or indirect
5117 * entity references to external entities.
5118 */
5119 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5120 (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5122 ctxt->sax->error(ctxt->userData,
5123 "Attribute references external entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005124 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005125 ctxt->wellFormed = 0;
5126 }
5127 /*
5128 * [ WFC: No < in Attribute Values ]
5129 * The replacement text of any entity referred to directly or
5130 * indirectly in an attribute value (other than "&lt;") must
5131 * not contain a <.
5132 */
5133 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00005134 (ent != NULL) &&
5135 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005136 (ent->content != NULL) &&
5137 (xmlStrchr(ent->content, '<'))) {
5138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5139 ctxt->sax->error(ctxt->userData,
5140 "'<' in entity '%s' is not allowed in attributes values\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005141 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005142 ctxt->wellFormed = 0;
5143 }
5144
5145 /*
5146 * Internal check, no parameter entities here ...
5147 */
5148 else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005149 switch (ent->type) {
5150 case XML_INTERNAL_PARAMETER_ENTITY:
5151 case XML_EXTERNAL_PARAMETER_ENTITY:
5152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005153 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005154 "Attempt to reference the parameter entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005155 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005156 ctxt->wellFormed = 0;
5157 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005158 }
5159 }
5160
5161 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005162 * [ WFC: No Recursion ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005163 * TODO A parsed entity must not contain a recursive reference
5164 * to itself, either directly or indirectly.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005165 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00005166
Daniel Veillard011b63c1999-06-02 17:44:04 +00005167 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005169 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005170 "xmlParseEntityRef: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005171 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005172 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005173 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005174 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005175 }
5176 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005177 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005178}
Daniel Veillard10a2c651999-12-12 13:03:50 +00005179/**
5180 * xmlParseStringEntityRef:
5181 * @ctxt: an XML parser context
5182 * @str: a pointer to an index in the string
5183 *
5184 * parse ENTITY references declarations, but this version parses it from
5185 * a string value.
5186 *
5187 * [68] EntityRef ::= '&' Name ';'
5188 *
5189 * [ WFC: Entity Declared ]
5190 * In a document without any DTD, a document with only an internal DTD
5191 * subset which contains no parameter entity references, or a document
5192 * with "standalone='yes'", the Name given in the entity reference
5193 * must match that in an entity declaration, except that well-formed
5194 * documents need not declare any of the following entities: amp, lt,
5195 * gt, apos, quot. The declaration of a parameter entity must precede
5196 * any reference to it. Similarly, the declaration of a general entity
5197 * must precede any reference to it which appears in a default value in an
5198 * attribute-list declaration. Note that if entities are declared in the
5199 * external subset or in external parameter entities, a non-validating
5200 * processor is not obligated to read and process their declarations;
5201 * for such documents, the rule that an entity must be declared is a
5202 * well-formedness constraint only if standalone='yes'.
5203 *
5204 * [ WFC: Parsed Entity ]
5205 * An entity reference must not contain the name of an unparsed entity
5206 *
5207 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5208 * is updated to the current location in the string.
5209 */
5210xmlEntityPtr
5211xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5212 xmlChar *name;
5213 const xmlChar *ptr;
5214 xmlChar cur;
5215 xmlEntityPtr ent = NULL;
5216
5217 GROW;
5218
5219 if ((str == NULL) || (*str == NULL)) return(NULL); /* !!! */
5220 ptr = *str;
5221 cur = *ptr;
5222 if (cur == '&') {
5223 ptr++;
5224 cur = *ptr;
5225 name = xmlParseStringName(ctxt, &ptr);
5226 if (name == NULL) {
5227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5228 ctxt->sax->error(ctxt->userData,
5229 "xmlParseEntityRef: no name\n");
5230 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5231 ctxt->wellFormed = 0;
5232 } else {
5233 if (CUR == ';') {
5234 NEXT;
5235 /*
5236 * Ask first SAX for entity resolution, otherwise try the
5237 * predefined set.
5238 */
5239 if (ctxt->sax != NULL) {
5240 if (ctxt->sax->getEntity != NULL)
5241 ent = ctxt->sax->getEntity(ctxt->userData, name);
5242 if (ent == NULL)
5243 ent = xmlGetPredefinedEntity(name);
5244 }
5245 /*
5246 * [ WFC: Entity Declared ]
5247 * In a document without any DTD, a document with only an
5248 * internal DTD subset which contains no parameter entity
5249 * references, or a document with "standalone='yes'", the
5250 * Name given in the entity reference must match that in an
5251 * entity declaration, except that well-formed documents
5252 * need not declare any of the following entities: amp, lt,
5253 * gt, apos, quot.
5254 * The declaration of a parameter entity must precede any
5255 * reference to it.
5256 * Similarly, the declaration of a general entity must
5257 * precede any reference to it which appears in a default
5258 * value in an attribute-list declaration. Note that if
5259 * entities are declared in the external subset or in
5260 * external parameter entities, a non-validating processor
5261 * is not obligated to read and process their declarations;
5262 * for such documents, the rule that an entity must be
5263 * declared is a well-formedness constraint only if
5264 * standalone='yes'.
5265 */
5266 if (ent == NULL) {
5267 if ((ctxt->standalone == 1) ||
5268 ((ctxt->hasExternalSubset == 0) &&
5269 (ctxt->hasPErefs == 0))) {
5270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5271 ctxt->sax->error(ctxt->userData,
5272 "Entity '%s' not defined\n", name);
5273 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5274 ctxt->wellFormed = 0;
5275 } else {
5276 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5277 ctxt->sax->warning(ctxt->userData,
5278 "Entity '%s' not defined\n", name);
5279 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5280 }
5281 }
5282
5283 /*
5284 * [ WFC: Parsed Entity ]
5285 * An entity reference must not contain the name of an
5286 * unparsed entity
5287 */
5288 else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5290 ctxt->sax->error(ctxt->userData,
5291 "Entity reference to unparsed entity %s\n", name);
5292 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5293 ctxt->wellFormed = 0;
5294 }
5295
5296 /*
5297 * [ WFC: No External Entity References ]
5298 * Attribute values cannot contain direct or indirect
5299 * entity references to external entities.
5300 */
5301 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5302 (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5304 ctxt->sax->error(ctxt->userData,
5305 "Attribute references external entity '%s'\n", name);
5306 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5307 ctxt->wellFormed = 0;
5308 }
5309 /*
5310 * [ WFC: No < in Attribute Values ]
5311 * The replacement text of any entity referred to directly or
5312 * indirectly in an attribute value (other than "&lt;") must
5313 * not contain a <.
5314 */
5315 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5316 (ent != NULL) &&
5317 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
5318 (ent->content != NULL) &&
5319 (xmlStrchr(ent->content, '<'))) {
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "'<' in entity '%s' is not allowed in attributes values\n", name);
5323 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5324 ctxt->wellFormed = 0;
5325 }
5326
5327 /*
5328 * Internal check, no parameter entities here ...
5329 */
5330 else {
5331 switch (ent->type) {
5332 case XML_INTERNAL_PARAMETER_ENTITY:
5333 case XML_EXTERNAL_PARAMETER_ENTITY:
5334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5335 ctxt->sax->error(ctxt->userData,
5336 "Attempt to reference the parameter entity '%s'\n", name);
5337 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5338 ctxt->wellFormed = 0;
5339 break;
5340 }
5341 }
5342
5343 /*
5344 * [ WFC: No Recursion ]
5345 * TODO A parsed entity must not contain a recursive reference
5346 * to itself, either directly or indirectly.
5347 */
5348
5349 } else {
5350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5351 ctxt->sax->error(ctxt->userData,
5352 "xmlParseEntityRef: expecting ';'\n");
5353 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5354 ctxt->wellFormed = 0;
5355 }
5356 xmlFree(name);
5357 }
5358 }
5359 return(ent);
5360}
Daniel Veillard260a68f1998-08-13 03:39:55 +00005361
Daniel Veillard11e00581998-10-24 18:27:49 +00005362/**
5363 * xmlParsePEReference:
5364 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005365 *
5366 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00005367 * The entity content is handled directly by pushing it's content as
5368 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005369 *
5370 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00005371 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005372 * [ WFC: No Recursion ]
5373 * TODO A parsed entity must not contain a recursive
5374 * reference to itself, either directly or indirectly.
5375 *
5376 * [ WFC: Entity Declared ]
5377 * In a document without any DTD, a document with only an internal DTD
5378 * subset which contains no parameter entity references, or a document
5379 * with "standalone='yes'", ... ... The declaration of a parameter
5380 * entity must precede any reference to it...
5381 *
5382 * [ VC: Entity Declared ]
5383 * In a document with an external subset or external parameter entities
5384 * with "standalone='no'", ... ... The declaration of a parameter entity
5385 * must precede any reference to it...
5386 *
5387 * [ WFC: In DTD ]
5388 * Parameter-entity references may only appear in the DTD.
5389 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005390 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005391void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005392xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005393 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00005394 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00005395 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005396
5397 if (CUR == '%') {
5398 NEXT;
5399 name = xmlParseName(ctxt);
5400 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005402 ctxt->sax->error(ctxt->userData,
5403 "xmlParsePEReference: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005404 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005405 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005406 } else {
5407 if (CUR == ';') {
5408 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005409 if ((ctxt->sax != NULL) &&
5410 (ctxt->sax->getParameterEntity != NULL))
5411 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5412 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005413 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005414 /*
5415 * [ WFC: Entity Declared ]
5416 * In a document without any DTD, a document with only an
5417 * internal DTD subset which contains no parameter entity
5418 * references, or a document with "standalone='yes'", ...
5419 * ... The declaration of a parameter entity must precede
5420 * any reference to it...
5421 */
5422 if ((ctxt->standalone == 1) ||
5423 ((ctxt->hasExternalSubset == 0) &&
5424 (ctxt->hasPErefs == 0))) {
5425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5426 ctxt->sax->error(ctxt->userData,
5427 "PEReference: %%%s; not found\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005428 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005429 ctxt->wellFormed = 0;
5430 } else {
5431 /*
5432 * [ VC: Entity Declared ]
5433 * In a document with an external subset or external
5434 * parameter entities with "standalone='no'", ...
5435 * ... The declaration of a parameter entity must precede
5436 * any reference to it...
5437 */
5438 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5439 ctxt->sax->warning(ctxt->userData,
5440 "PEReference: %%%s; not found\n", name);
5441 ctxt->valid = 0;
5442 }
Daniel Veillardccb09631998-10-27 06:21:04 +00005443 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005444 /*
5445 * Internal checking in case the entity quest barfed
5446 */
5447 if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
5448 (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
5449 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5450 ctxt->sax->warning(ctxt->userData,
5451 "Internal: %%%s; is not a parameter entity\n", name);
5452 } else {
5453 input = xmlNewEntityInputStream(ctxt, entity);
5454 xmlPushInput(ctxt, input);
5455 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005456 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005457 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005458 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005459 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005460 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005461 "xmlParsePEReference: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005462 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005463 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005464 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005465 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005466 }
5467 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005468}
5469
Daniel Veillard11e00581998-10-24 18:27:49 +00005470/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00005471 * xmlParseStringPEReference:
5472 * @ctxt: an XML parser context
5473 * @str: a pointer to an index in the string
5474 *
5475 * parse PEReference declarations
5476 *
5477 * [69] PEReference ::= '%' Name ';'
5478 *
5479 * [ WFC: No Recursion ]
5480 * TODO A parsed entity must not contain a recursive
5481 * reference to itself, either directly or indirectly.
5482 *
5483 * [ WFC: Entity Declared ]
5484 * In a document without any DTD, a document with only an internal DTD
5485 * subset which contains no parameter entity references, or a document
5486 * with "standalone='yes'", ... ... The declaration of a parameter
5487 * entity must precede any reference to it...
5488 *
5489 * [ VC: Entity Declared ]
5490 * In a document with an external subset or external parameter entities
5491 * with "standalone='no'", ... ... The declaration of a parameter entity
5492 * must precede any reference to it...
5493 *
5494 * [ WFC: In DTD ]
5495 * Parameter-entity references may only appear in the DTD.
5496 * NOTE: misleading but this is handled.
5497 *
5498 * Returns the string of the entity content.
5499 * str is updated to the current value of the index
5500 */
5501xmlEntityPtr
5502xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5503 const xmlChar *ptr;
5504 xmlChar cur;
5505 xmlChar *name;
5506 xmlEntityPtr entity = NULL;
5507
5508 if ((str == NULL) || (*str == NULL)) return(NULL);
5509 ptr = *str;
5510 cur = *ptr;
5511 if (cur == '%') {
5512 ptr++;
5513 cur = *ptr;
5514 name = xmlParseStringName(ctxt, &ptr);
5515 if (name == NULL) {
5516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "xmlParseStringPEReference: no name\n");
5519 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5520 ctxt->wellFormed = 0;
5521 } else {
5522 cur = *ptr;
5523 if (cur == ';') {
5524 ptr++;
5525 cur = *ptr;
5526 if ((ctxt->sax != NULL) &&
5527 (ctxt->sax->getParameterEntity != NULL))
5528 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5529 name);
5530 if (entity == NULL) {
5531 /*
5532 * [ WFC: Entity Declared ]
5533 * In a document without any DTD, a document with only an
5534 * internal DTD subset which contains no parameter entity
5535 * references, or a document with "standalone='yes'", ...
5536 * ... The declaration of a parameter entity must precede
5537 * any reference to it...
5538 */
5539 if ((ctxt->standalone == 1) ||
5540 ((ctxt->hasExternalSubset == 0) &&
5541 (ctxt->hasPErefs == 0))) {
5542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5543 ctxt->sax->error(ctxt->userData,
5544 "PEReference: %%%s; not found\n", name);
5545 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5546 ctxt->wellFormed = 0;
5547 } else {
5548 /*
5549 * [ VC: Entity Declared ]
5550 * In a document with an external subset or external
5551 * parameter entities with "standalone='no'", ...
5552 * ... The declaration of a parameter entity must
5553 * precede any reference to it...
5554 */
5555 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5556 ctxt->sax->warning(ctxt->userData,
5557 "PEReference: %%%s; not found\n", name);
5558 ctxt->valid = 0;
5559 }
5560 } else {
5561 /*
5562 * Internal checking in case the entity quest barfed
5563 */
5564 if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
5565 (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
5566 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5567 ctxt->sax->warning(ctxt->userData,
5568 "Internal: %%%s; is not a parameter entity\n", name);
5569 }
5570 }
5571 ctxt->hasPErefs = 1;
5572 } else {
5573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5574 ctxt->sax->error(ctxt->userData,
5575 "xmlParseStringPEReference: expecting ';'\n");
5576 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5577 ctxt->wellFormed = 0;
5578 }
5579 xmlFree(name);
5580 }
5581 }
5582 *str = ptr;
5583 return(entity);
5584}
5585
5586/**
Daniel Veillard11e00581998-10-24 18:27:49 +00005587 * xmlParseDocTypeDecl :
5588 * @ctxt: an XML parser context
5589 *
5590 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005591 *
5592 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5593 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00005594 *
5595 * [ VC: Root Element Type ]
5596 * The Name in the document type declaration must match the element
5597 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005598 */
5599
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005600void
5601xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005602 xmlChar *name;
5603 xmlChar *ExternalID = NULL;
5604 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005605
5606 /*
5607 * We know that '<!DOCTYPE' has been detected.
5608 */
5609 SKIP(9);
5610
5611 SKIP_BLANKS;
5612
5613 /*
5614 * Parse the DOCTYPE name.
5615 */
5616 name = xmlParseName(ctxt);
5617 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005619 ctxt->sax->error(ctxt->userData,
5620 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005621 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005622 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005623 }
5624
5625 SKIP_BLANKS;
5626
5627 /*
5628 * Check for SystemID and ExternalID
5629 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00005630 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005631
5632 if ((URI != NULL) || (ExternalID != NULL)) {
5633 ctxt->hasExternalSubset = 1;
5634 }
5635
Daniel Veillard260a68f1998-08-13 03:39:55 +00005636 SKIP_BLANKS;
5637
Daniel Veillard011b63c1999-06-02 17:44:04 +00005638 /*
5639 * NOTE: the SAX callback may try to fetch the external subset
5640 * entity and fill it up !
5641 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005642 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005643 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005644
5645 /*
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005646 * Cleanup
5647 */
5648 if (URI != NULL) xmlFree(URI);
5649 if (ExternalID != NULL) xmlFree(ExternalID);
5650 if (name != NULL) xmlFree(name);
5651
5652 /*
5653 * Is there any internal subset declarations ?
5654 * they are handled separately in xmlParseInternalSubset()
5655 */
5656 if (CUR == '[')
5657 return;
5658
5659 /*
5660 * We should be at the end of the DOCTYPE declaration.
5661 */
5662 if (CUR != '>') {
5663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5664 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5665 ctxt->wellFormed = 0;
5666 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5667 }
5668 NEXT;
5669}
5670
5671/**
5672 * xmlParseInternalsubset :
5673 * @ctxt: an XML parser context
5674 *
5675 * parse the internal subset declaration
5676 *
5677 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5678 */
5679
5680void
5681xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5682 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005683 * Is there any DTD definition ?
5684 */
5685 if (CUR == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005686 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005687 NEXT;
5688 /*
5689 * Parse the succession of Markup declarations and
5690 * PEReferences.
5691 * Subsequence (markupdecl | PEReference | S)*
5692 */
5693 while (CUR != ']') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005694 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005695 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005696
5697 SKIP_BLANKS;
5698 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00005699 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005700
Daniel Veillard011b63c1999-06-02 17:44:04 +00005701 /*
5702 * Pop-up of finished entities.
5703 */
5704 while ((CUR == 0) && (ctxt->inputNr > 1))
5705 xmlPopInput(ctxt);
5706
Daniel Veillardc26087b1999-08-30 11:23:51 +00005707 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00005708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5709 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005710 "xmlParseInternalSubset: error detected in Markup declaration\n");
Daniel Veillardb96e6431999-08-29 21:02:19 +00005711 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005712 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005713 break;
5714 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005715 }
5716 if (CUR == ']') NEXT;
5717 }
5718
5719 /*
5720 * We should be at the end of the DOCTYPE declaration.
5721 */
5722 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005724 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005725 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005726 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005727 }
5728 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005729}
5730
Daniel Veillard11e00581998-10-24 18:27:49 +00005731/**
5732 * xmlParseAttribute:
5733 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005734 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00005735 *
5736 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00005737 *
5738 * [41] Attribute ::= Name Eq AttValue
5739 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005740 * [ WFC: No External Entity References ]
5741 * Attribute values cannot contain direct or indirect entity references
5742 * to external entities.
5743 *
5744 * [ WFC: No < in Attribute Values ]
5745 * The replacement text of any entity referred to directly or indirectly in
5746 * an attribute value (other than "&lt;") must not contain a <.
5747 *
5748 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005749 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00005750 * declared for it.
5751 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005752 * [25] Eq ::= S? '=' S?
5753 *
5754 * With namespace:
5755 *
5756 * [NS 11] Attribute ::= QName Eq AttValue
5757 *
5758 * Also the case QName == xmlns:??? is handled independently as a namespace
5759 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00005760 *
Daniel Veillard517752b1999-04-05 12:20:10 +00005761 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005762 */
5763
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005764xmlChar *
5765xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5766 xmlChar *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005767
Daniel Veillard517752b1999-04-05 12:20:10 +00005768 *value = NULL;
5769 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005770 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005772 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005773 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005774 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillardccb09631998-10-27 06:21:04 +00005775 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005776 }
5777
5778 /*
5779 * read the value
5780 */
5781 SKIP_BLANKS;
5782 if (CUR == '=') {
5783 NEXT;
5784 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00005785 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005786 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005787 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005789 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005790 "Specification mandate value for attribute %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005791 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005792 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00005793 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005794 }
5795
Daniel Veillard517752b1999-04-05 12:20:10 +00005796 *value = val;
5797 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005798}
5799
Daniel Veillard11e00581998-10-24 18:27:49 +00005800/**
5801 * xmlParseStartTag:
5802 * @ctxt: an XML parser context
5803 *
5804 * parse a start of tag either for rule element or
5805 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005806 *
5807 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5808 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005809 * [ WFC: Unique Att Spec ]
5810 * No attribute name may appear more than once in the same start-tag or
5811 * empty-element tag.
5812 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005813 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5814 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005815 * [ WFC: Unique Att Spec ]
5816 * No attribute name may appear more than once in the same start-tag or
5817 * empty-element tag.
5818 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005819 * With namespace:
5820 *
5821 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5822 *
5823 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00005824 *
Daniel Veillard7f858501999-11-17 17:32:38 +00005825 * Returne the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00005826 */
5827
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005828xmlChar *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005829xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005830 xmlChar *name;
5831 xmlChar *attname;
5832 xmlChar *attvalue;
5833 const xmlChar **atts = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00005834 int nbatts = 0;
5835 int maxatts = 0;
5836 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005837
Daniel Veillard14fff061999-06-22 21:49:07 +00005838 if (CUR != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005839 NEXT;
5840
Daniel Veillard517752b1999-04-05 12:20:10 +00005841 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005842 if (name == NULL) {
5843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005844 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005845 "xmlParseStartTag: invalid element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005846 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005847 ctxt->wellFormed = 0;
Daniel Veillard14fff061999-06-22 21:49:07 +00005848 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005849 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005850
5851 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005852 * Now parse the attributes, it ends up with the ending
5853 *
5854 * (S Attribute)* S?
5855 */
5856 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005857 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005858 while ((IS_CHAR(CUR)) &&
5859 (CUR != '>') &&
5860 ((CUR != '/') || (NXT(1) != '>'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005861 const xmlChar *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005862 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005863
Daniel Veillard517752b1999-04-05 12:20:10 +00005864 attname = xmlParseAttribute(ctxt, &attvalue);
5865 if ((attname != NULL) && (attvalue != NULL)) {
5866 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005867 * [ WFC: Unique Att Spec ]
5868 * No attribute name may appear more than once in the same
5869 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00005870 */
5871 for (i = 0; i < nbatts;i += 2) {
5872 if (!xmlStrcmp(atts[i], attname)) {
5873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005874 ctxt->sax->error(ctxt->userData,
5875 "Attribute %s redefined\n",
5876 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00005877 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005878 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard6454aec1999-09-02 22:04:43 +00005879 xmlFree(attname);
5880 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005881 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00005882 }
5883 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005884
Daniel Veillard517752b1999-04-05 12:20:10 +00005885 /*
5886 * Add the pair to atts
5887 */
5888 if (atts == NULL) {
5889 maxatts = 10;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005890 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00005891 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00005892 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005893 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00005894 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00005895 }
Daniel Veillard51e3b151999-11-12 17:02:31 +00005896 } else if (nbatts + 4 > maxatts) {
Daniel Veillard517752b1999-04-05 12:20:10 +00005897 maxatts *= 2;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005898 atts = (const xmlChar **) xmlRealloc(atts,
5899 maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00005900 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00005901 fprintf(stderr, "realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005902 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00005903 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00005904 }
5905 }
5906 atts[nbatts++] = attname;
5907 atts[nbatts++] = attvalue;
5908 atts[nbatts] = NULL;
5909 atts[nbatts + 1] = NULL;
5910 }
5911
Daniel Veillardb96e6431999-08-29 21:02:19 +00005912failed:
Daniel Veillard517752b1999-04-05 12:20:10 +00005913 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005914 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005916 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005917 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005918 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005919 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005920 break;
5921 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005922 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005923 }
5924
5925 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005926 * SAX: Start of Element !
5927 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005928 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005929 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00005930
Daniel Veillard517752b1999-04-05 12:20:10 +00005931 if (atts != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005932 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005933 xmlFree(atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00005934 }
Daniel Veillard14fff061999-06-22 21:49:07 +00005935 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005936}
5937
Daniel Veillard11e00581998-10-24 18:27:49 +00005938/**
5939 * xmlParseEndTag:
5940 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005941 *
5942 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00005943 *
5944 * [42] ETag ::= '</' Name S? '>'
5945 *
5946 * With namespace
5947 *
Daniel Veillard517752b1999-04-05 12:20:10 +00005948 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00005949 */
5950
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005951void
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005952xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005953 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005954 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005955
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005956 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005957 if ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005959 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005960 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005961 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005962 return;
5963 }
5964 SKIP(2);
5965
Daniel Veillard517752b1999-04-05 12:20:10 +00005966 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005967
5968 /*
5969 * We should definitely be at the ending "S? '>'" part
5970 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005971 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005972 SKIP_BLANKS;
5973 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005975 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005976 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005977 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005978 } else
5979 NEXT;
5980
Daniel Veillard517752b1999-04-05 12:20:10 +00005981 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005982 * [ WFC: Element Type Match ]
5983 * The Name in an element's end-tag must match the element type in the
5984 * start-tag.
5985 *
Daniel Veillard14fff061999-06-22 21:49:07 +00005986 */
Daniel Veillardda07c342000-01-25 18:31:22 +00005987 if ((name == NULL) || (ctxt->name == NULL) ||
5988 (xmlStrcmp(name, ctxt->name))) {
5989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
5990 if ((name != NULL) && (ctxt->name != NULL)) {
5991 ctxt->sax->error(ctxt->userData,
5992 "Opening and ending tag mismatch: %s and %s\n",
5993 ctxt->name, name);
5994 } else if (ctxt->name != NULL) {
5995 ctxt->sax->error(ctxt->userData,
5996 "Ending tag eror for: %s\n", ctxt->name);
5997 } else {
5998 ctxt->sax->error(ctxt->userData,
5999 "Ending tag error: internal error ???\n");
6000 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006001
Daniel Veillardda07c342000-01-25 18:31:22 +00006002 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006003 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
Daniel Veillard14fff061999-06-22 21:49:07 +00006004 ctxt->wellFormed = 0;
6005 }
6006
6007 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00006008 * SAX: End of Tag
6009 */
6010 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006011 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00006012
6013 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006014 xmlFree(name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006015 oldname = namePop(ctxt);
6016 if (oldname != NULL) {
6017#ifdef DEBUG_STACK
6018 fprintf(stderr,"Close: popped %s\n", oldname);
6019#endif
6020 xmlFree(oldname);
6021 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006022 return;
6023}
6024
Daniel Veillard11e00581998-10-24 18:27:49 +00006025/**
6026 * xmlParseCDSect:
6027 * @ctxt: an XML parser context
6028 *
6029 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006030 *
6031 * [18] CDSect ::= CDStart CData CDEnd
6032 *
6033 * [19] CDStart ::= '<![CDATA['
6034 *
6035 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6036 *
6037 * [21] CDEnd ::= ']]>'
6038 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006039void
6040xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006041 xmlChar *buf = NULL;
6042 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006043 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006044 xmlChar r, s;
6045 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006046
Daniel Veillardb05deb71999-08-10 19:04:08 +00006047 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006048 (NXT(2) == '[') && (NXT(3) == 'C') &&
6049 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6050 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6051 (NXT(8) == '[')) {
6052 SKIP(9);
6053 } else
6054 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006055
6056 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006057 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006059 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006060 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006061 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006062 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006063 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006064 return;
6065 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006066 r = CUR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006067 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006068 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006070 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006071 "CData section not finished\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006072 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006073 ctxt->wellFormed = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006074 ctxt->instate = XML_PARSER_CONTENT;
6075 return;
6076 }
6077 s = CUR;
6078 NEXT;
6079 cur = CUR;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006080 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6081 if (buf == NULL) {
6082 fprintf(stderr, "malloc of %d byte failed\n", size);
6083 return;
6084 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006085 while (IS_CHAR(cur) &&
6086 ((r != ']') || (s != ']') || (cur != '>'))) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006087 if (len + 1 >= size) {
6088 size *= 2;
6089 buf = xmlRealloc(buf, size * sizeof(xmlChar));
6090 if (buf == NULL) {
6091 fprintf(stderr, "realloc of %d byte failed\n", size);
6092 return;
6093 }
6094 }
6095 buf[len++] = r;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006096 r = s;
6097 s = cur;
6098 NEXT;
6099 cur = CUR;
6100 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006101 buf[len] = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006102 ctxt->instate = XML_PARSER_CONTENT;
6103 if (!IS_CHAR(CUR)) {
6104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006105 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00006106 "CData section not finished\n%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006107 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006108 ctxt->wellFormed = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006109 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006110 return;
6111 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006112 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006113
6114 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00006115 * Ok the buffer is to be consumed as cdata.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006116 */
6117 if (ctxt->sax != NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006118 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillard10a2c651999-12-12 13:03:50 +00006119 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006120 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006121 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006122}
6123
Daniel Veillard11e00581998-10-24 18:27:49 +00006124/**
6125 * xmlParseContent:
6126 * @ctxt: an XML parser context
6127 *
6128 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00006129 *
6130 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6131 */
6132
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006133void
6134xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006135 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006136 while ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006137 const xmlChar *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006138 int cons = ctxt->input->consumed;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006139 xmlChar tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006140
6141 /*
6142 * First case : a Processing Instruction.
6143 */
6144 if ((CUR == '<') && (NXT(1) == '?')) {
6145 xmlParsePI(ctxt);
6146 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006147
Daniel Veillard260a68f1998-08-13 03:39:55 +00006148 /*
6149 * Second case : a CDSection
6150 */
6151 else if ((CUR == '<') && (NXT(1) == '!') &&
6152 (NXT(2) == '[') && (NXT(3) == 'C') &&
6153 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6154 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6155 (NXT(8) == '[')) {
6156 xmlParseCDSect(ctxt);
6157 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006158
Daniel Veillard260a68f1998-08-13 03:39:55 +00006159 /*
6160 * Third case : a comment
6161 */
6162 else if ((CUR == '<') && (NXT(1) == '!') &&
6163 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006164 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006165 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006166 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006167
Daniel Veillard260a68f1998-08-13 03:39:55 +00006168 /*
6169 * Fourth case : a sub-element.
6170 */
6171 else if (CUR == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00006172 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006173 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006174
Daniel Veillard260a68f1998-08-13 03:39:55 +00006175 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00006176 * Fifth case : a reference. If if has not been resolved,
6177 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00006178 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00006179
Daniel Veillard260a68f1998-08-13 03:39:55 +00006180 else if (CUR == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006181 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006182 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006183
Daniel Veillard260a68f1998-08-13 03:39:55 +00006184 /*
6185 * Last case, text. Note that References are handled directly.
6186 */
6187 else {
6188 xmlParseCharData(ctxt, 0);
6189 }
6190
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006191 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006192 /*
6193 * Pop-up of finished entities.
6194 */
Daniel Veillardbc50b591999-03-01 12:28:53 +00006195 while ((CUR == 0) && (ctxt->inputNr > 1))
6196 xmlPopInput(ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006197 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006198
Daniel Veillardb96e6431999-08-29 21:02:19 +00006199 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6200 (tok == ctxt->token)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006202 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006203 "detected an error in element content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006204 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006205 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006206 break;
6207 }
6208 }
6209}
6210
Daniel Veillard11e00581998-10-24 18:27:49 +00006211/**
6212 * xmlParseElement:
6213 * @ctxt: an XML parser context
6214 *
6215 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00006216 *
6217 * [39] element ::= EmptyElemTag | STag content ETag
6218 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006219 * [ WFC: Element Type Match ]
6220 * The Name in an element's end-tag must match the element type in the
6221 * start-tag.
6222 *
6223 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006224 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00006225 * where the Name matches the element type and one of the following holds:
6226 * - The declaration matches EMPTY and the element has no content.
6227 * - The declaration matches children and the sequence of child elements
6228 * belongs to the language generated by the regular expression in the
6229 * content model, with optional white space (characters matching the
6230 * nonterminal S) between each pair of child elements.
6231 * - The declaration matches Mixed and the content consists of character
6232 * data and child elements whose types match names in the content model.
6233 * - The declaration matches ANY, and the types of any child elements have
6234 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006235 */
6236
Daniel Veillard517752b1999-04-05 12:20:10 +00006237void
Daniel Veillard1e346af1999-02-22 10:33:01 +00006238xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006239 const xmlChar *openTag = CUR_PTR;
6240 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006241 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006242 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00006243 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006244
6245 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00006246 if (ctxt->record_info) {
6247 node_info.begin_pos = ctxt->input->consumed +
6248 (CUR_PTR - ctxt->input->base);
6249 node_info.begin_line = ctxt->input->line;
6250 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006251
Daniel Veillard14fff061999-06-22 21:49:07 +00006252 name = xmlParseStartTag(ctxt);
6253 if (name == NULL) {
6254 return;
6255 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006256 namePush(ctxt, name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00006257 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006258
6259 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006260 * [ VC: Root Element Type ]
6261 * The Name in the document type declaration must match the element
6262 * type of the root element.
6263 */
6264 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6265 ctxt->node && (ctxt->node == ctxt->myDoc->root))
6266 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6267
6268 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00006269 * Check for an Empty Element.
6270 */
6271 if ((CUR == '/') && (NXT(1) == '>')) {
6272 SKIP(2);
Daniel Veillard517752b1999-04-05 12:20:10 +00006273 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
Daniel Veillard14fff061999-06-22 21:49:07 +00006274 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006275 oldname = namePop(ctxt);
6276 if (oldname != NULL) {
6277#ifdef DEBUG_STACK
6278 fprintf(stderr,"Close: popped %s\n", oldname);
6279#endif
6280 xmlFree(oldname);
6281 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006282 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006283 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006284 if (CUR == '>') {
6285 NEXT;
6286 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006287 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006288 ctxt->sax->error(ctxt->userData,
6289 "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00006290 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006291 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006292 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006293
6294 /*
6295 * end of parsing of this node.
6296 */
6297 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006298 oldname = namePop(ctxt);
6299 if (oldname != NULL) {
6300#ifdef DEBUG_STACK
6301 fprintf(stderr,"Close: popped %s\n", oldname);
6302#endif
6303 xmlFree(oldname);
6304 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00006305
6306 /*
6307 * Capture end position and add node
6308 */
6309 if ( ret != NULL && ctxt->record_info ) {
6310 node_info.end_pos = ctxt->input->consumed +
6311 (CUR_PTR - ctxt->input->base);
6312 node_info.end_line = ctxt->input->line;
6313 node_info.node = ret;
6314 xmlParserAddNodeInfo(ctxt, &node_info);
6315 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006316 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006317 }
6318
6319 /*
6320 * Parse the content of the element:
6321 */
6322 xmlParseContent(ctxt);
6323 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006325 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00006326 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006327 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006328 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006329
6330 /*
6331 * end of parsing of this node.
6332 */
6333 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006334 oldname = namePop(ctxt);
6335 if (oldname != NULL) {
6336#ifdef DEBUG_STACK
6337 fprintf(stderr,"Close: popped %s\n", oldname);
6338#endif
6339 xmlFree(oldname);
6340 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006341 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006342 }
6343
6344 /*
6345 * parse the end of tag: '</' should be here.
6346 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006347 xmlParseEndTag(ctxt);
Daniel Veillardc26087b1999-08-30 11:23:51 +00006348
6349 /*
6350 * Capture end position and add node
6351 */
6352 if ( ret != NULL && ctxt->record_info ) {
6353 node_info.end_pos = ctxt->input->consumed +
6354 (CUR_PTR - ctxt->input->base);
6355 node_info.end_line = ctxt->input->line;
6356 node_info.node = ret;
6357 xmlParserAddNodeInfo(ctxt, &node_info);
6358 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006359}
6360
Daniel Veillard11e00581998-10-24 18:27:49 +00006361/**
6362 * xmlParseVersionNum:
6363 * @ctxt: an XML parser context
6364 *
6365 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006366 *
6367 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00006368 *
6369 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006370 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006371xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006372xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006373 xmlChar *buf = NULL;
6374 int len = 0;
6375 int size = 10;
6376 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006377
Daniel Veillard10a2c651999-12-12 13:03:50 +00006378 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6379 if (buf == NULL) {
6380 fprintf(stderr, "malloc of %d byte failed\n", size);
6381 return(NULL);
6382 }
6383 cur = CUR;
6384 while (IS_CHAR(cur) &&
6385 (((cur >= 'a') && (cur <= 'z')) ||
6386 ((cur >= 'A') && (cur <= 'Z')) ||
6387 ((cur >= '0') && (cur <= '9')) ||
6388 (cur == '_') || (cur == '.') ||
6389 (cur == ':') || (cur == '-'))) {
6390 if (len + 1 >= size) {
6391 size *= 2;
6392 buf = xmlRealloc(buf, size * sizeof(xmlChar));
6393 if (buf == NULL) {
6394 fprintf(stderr, "realloc of %d byte failed\n", size);
6395 return(NULL);
6396 }
6397 }
6398 buf[len++] = cur;
6399 NEXT;
6400 cur=CUR;
6401 }
6402 buf[len] = 0;
6403 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006404}
6405
Daniel Veillard11e00581998-10-24 18:27:49 +00006406/**
6407 * xmlParseVersionInfo:
6408 * @ctxt: an XML parser context
6409 *
6410 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006411 *
6412 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6413 *
6414 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00006415 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006416 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00006417 */
6418
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006419xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006420xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006421 xmlChar *version = NULL;
6422 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006423
6424 if ((CUR == 'v') && (NXT(1) == 'e') &&
6425 (NXT(2) == 'r') && (NXT(3) == 's') &&
6426 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6427 (NXT(6) == 'n')) {
6428 SKIP(7);
6429 SKIP_BLANKS;
6430 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006432 ctxt->sax->error(ctxt->userData,
6433 "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006434 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006435 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006436 return(NULL);
6437 }
6438 NEXT;
6439 SKIP_BLANKS;
6440 if (CUR == '"') {
6441 NEXT;
6442 q = CUR_PTR;
6443 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006444 if (CUR != '"') {
6445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006446 ctxt->sax->error(ctxt->userData,
6447 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006448 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006449 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006450 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006451 NEXT;
6452 } else if (CUR == '\''){
6453 NEXT;
6454 q = CUR_PTR;
6455 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006456 if (CUR != '\'') {
6457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006458 ctxt->sax->error(ctxt->userData,
6459 "String not closed\n%.50s\n", q);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006460 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006461 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006462 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006463 NEXT;
6464 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006466 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006467 "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006468 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006469 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006470 }
6471 }
6472 return(version);
6473}
6474
Daniel Veillard11e00581998-10-24 18:27:49 +00006475/**
6476 * xmlParseEncName:
6477 * @ctxt: an XML parser context
6478 *
6479 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00006480 *
6481 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00006482 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006483 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006484 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006485xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006486xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006487 xmlChar *buf = NULL;
6488 int len = 0;
6489 int size = 10;
6490 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006491
Daniel Veillard10a2c651999-12-12 13:03:50 +00006492 cur = CUR;
6493 if (((cur >= 'a') && (cur <= 'z')) ||
6494 ((cur >= 'A') && (cur <= 'Z'))) {
6495 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6496 if (buf == NULL) {
6497 fprintf(stderr, "malloc of %d byte failed\n", size);
6498 return(NULL);
6499 }
6500
6501 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006502 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006503 cur = CUR;
6504 while (IS_CHAR(cur) &&
6505 (((cur >= 'a') && (cur <= 'z')) ||
6506 ((cur >= 'A') && (cur <= 'Z')) ||
6507 ((cur >= '0') && (cur <= '9')) ||
6508 (cur == '.') || (cur == '_') ||
6509 (cur == '-'))) {
6510 if (len + 1 >= size) {
6511 size *= 2;
6512 buf = xmlRealloc(buf, size * sizeof(xmlChar));
6513 if (buf == NULL) {
6514 fprintf(stderr, "realloc of %d byte failed\n", size);
6515 return(NULL);
6516 }
6517 }
6518 buf[len++] = cur;
6519 NEXT;
6520 cur = CUR;
6521 if (cur == 0) {
6522 SHRINK;
6523 GROW;
6524 cur = CUR;
6525 }
6526 }
6527 buf[len] = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006528 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006530 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006531 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006532 ctxt->errNo = XML_ERR_ENCODING_NAME;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006533 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00006534 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006535}
6536
Daniel Veillard11e00581998-10-24 18:27:49 +00006537/**
6538 * xmlParseEncodingDecl:
6539 * @ctxt: an XML parser context
6540 *
6541 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00006542 *
6543 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00006544 *
6545 * TODO: this should setup the conversion filters.
6546 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006547 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006548 */
6549
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006550xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006551xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006552 xmlChar *encoding = NULL;
6553 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006554
6555 SKIP_BLANKS;
6556 if ((CUR == 'e') && (NXT(1) == 'n') &&
6557 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6558 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6559 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6560 SKIP(8);
6561 SKIP_BLANKS;
6562 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006564 ctxt->sax->error(ctxt->userData,
6565 "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006566 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006567 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006568 return(NULL);
6569 }
6570 NEXT;
6571 SKIP_BLANKS;
6572 if (CUR == '"') {
6573 NEXT;
6574 q = CUR_PTR;
6575 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006576 if (CUR != '"') {
6577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006578 ctxt->sax->error(ctxt->userData,
6579 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006580 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006581 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006582 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006583 NEXT;
6584 } else if (CUR == '\''){
6585 NEXT;
6586 q = CUR_PTR;
6587 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006588 if (CUR != '\'') {
6589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006590 ctxt->sax->error(ctxt->userData,
6591 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006592 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006593 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006594 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006595 NEXT;
6596 } else if (CUR == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006598 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006599 "xmlParseEncodingDecl : expected ' or \"\n");
6600 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006601 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006602 }
6603 }
6604 return(encoding);
6605}
6606
Daniel Veillard11e00581998-10-24 18:27:49 +00006607/**
6608 * xmlParseSDDecl:
6609 * @ctxt: an XML parser context
6610 *
6611 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00006612 *
6613 * [32] SDDecl ::= S 'standalone' Eq
6614 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00006615 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006616 * [ VC: Standalone Document Declaration ]
6617 * TODO The standalone document declaration must have the value "no"
6618 * if any external markup declarations contain declarations of:
6619 * - attributes with default values, if elements to which these
6620 * attributes apply appear in the document without specifications
6621 * of values for these attributes, or
6622 * - entities (other than amp, lt, gt, apos, quot), if references
6623 * to those entities appear in the document, or
6624 * - attributes with values subject to normalization, where the
6625 * attribute appears in the document with a value which will change
6626 * as a result of normalization, or
6627 * - element types with element content, if white space occurs directly
6628 * within any instance of those types.
6629 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006630 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00006631 */
6632
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006633int
6634xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006635 int standalone = -1;
6636
6637 SKIP_BLANKS;
6638 if ((CUR == 's') && (NXT(1) == 't') &&
6639 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6640 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6641 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6642 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6643 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006644 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006645 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006647 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006648 "XML standalone declaration : expected '='\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006649 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006650 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006651 return(standalone);
6652 }
6653 NEXT;
6654 SKIP_BLANKS;
6655 if (CUR == '\''){
6656 NEXT;
6657 if ((CUR == 'n') && (NXT(1) == 'o')) {
6658 standalone = 0;
6659 SKIP(2);
6660 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
6661 (NXT(2) == 's')) {
6662 standalone = 1;
6663 SKIP(3);
6664 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006666 ctxt->sax->error(ctxt->userData,
6667 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006668 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006669 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006670 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006671 if (CUR != '\'') {
6672 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006673 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006674 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006675 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006676 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006677 NEXT;
6678 } else if (CUR == '"'){
6679 NEXT;
6680 if ((CUR == 'n') && (NXT(1) == 'o')) {
6681 standalone = 0;
6682 SKIP(2);
6683 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
6684 (NXT(2) == 's')) {
6685 standalone = 1;
6686 SKIP(3);
6687 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006689 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006690 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006691 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006692 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006693 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006694 if (CUR != '"') {
6695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006696 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006697 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006698 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006699 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00006700 NEXT;
6701 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006703 ctxt->sax->error(ctxt->userData,
6704 "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006705 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006706 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006707 }
6708 }
6709 return(standalone);
6710}
6711
Daniel Veillard11e00581998-10-24 18:27:49 +00006712/**
6713 * xmlParseXMLDecl:
6714 * @ctxt: an XML parser context
6715 *
6716 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00006717 *
6718 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6719 */
6720
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006721void
6722xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006723 xmlChar *version;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006724
6725 /*
6726 * We know that '<?xml' is here.
6727 */
6728 SKIP(5);
6729
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006730 if (!IS_BLANK(CUR)) {
6731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006732 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006734 ctxt->wellFormed = 0;
6735 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006736 SKIP_BLANKS;
6737
6738 /*
6739 * We should have the VersionInfo here.
6740 */
6741 version = xmlParseVersionInfo(ctxt);
6742 if (version == NULL)
6743 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00006744 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00006745 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006746
6747 /*
6748 * We may have the encoding declaration
6749 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006750 if (!IS_BLANK(CUR)) {
6751 if ((CUR == '?') && (NXT(1) == '>')) {
6752 SKIP(2);
6753 return;
6754 }
6755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006756 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006757 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006758 ctxt->wellFormed = 0;
6759 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006760 ctxt->encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006761
6762 /*
6763 * We may have the standalone status.
6764 */
Daniel Veillard517752b1999-04-05 12:20:10 +00006765 if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006766 if ((CUR == '?') && (NXT(1) == '>')) {
6767 SKIP(2);
6768 return;
6769 }
6770 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006771 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006772 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006773 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006774 }
6775 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00006776 ctxt->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006777
6778 SKIP_BLANKS;
6779 if ((CUR == '?') && (NXT(1) == '>')) {
6780 SKIP(2);
6781 } else if (CUR == '>') {
6782 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006784 ctxt->sax->error(ctxt->userData,
6785 "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006786 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006787 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006788 NEXT;
6789 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006791 ctxt->sax->error(ctxt->userData,
6792 "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006793 ctxt->wellFormed = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006794 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006795 MOVETO_ENDTAG(CUR_PTR);
6796 NEXT;
6797 }
6798}
6799
Daniel Veillard11e00581998-10-24 18:27:49 +00006800/**
6801 * xmlParseMisc:
6802 * @ctxt: an XML parser context
6803 *
6804 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006805 *
6806 * [27] Misc ::= Comment | PI | S
6807 */
6808
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006809void
6810xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006811 while (((CUR == '<') && (NXT(1) == '?')) ||
6812 ((CUR == '<') && (NXT(1) == '!') &&
6813 (NXT(2) == '-') && (NXT(3) == '-')) ||
6814 IS_BLANK(CUR)) {
6815 if ((CUR == '<') && (NXT(1) == '?')) {
6816 xmlParsePI(ctxt);
6817 } else if (IS_BLANK(CUR)) {
6818 NEXT;
6819 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00006820 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006821 }
6822}
6823
Daniel Veillard11e00581998-10-24 18:27:49 +00006824/**
6825 * xmlParseDocument :
6826 * @ctxt: an XML parser context
6827 *
6828 * parse an XML document (and build a tree if using the standard SAX
6829 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00006830 *
6831 * [1] document ::= prolog element Misc*
6832 *
6833 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00006834 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006835 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00006836 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006837 */
6838
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006839int
6840xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006841 xmlDefaultSAXHandlerInit();
6842
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006843 GROW;
6844
Daniel Veillard260a68f1998-08-13 03:39:55 +00006845 /*
6846 * SAX: beginning of the document processing.
6847 */
Daniel Veillard517752b1999-04-05 12:20:10 +00006848 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00006849 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006850
6851 /*
Daniel Veillardb96e6431999-08-29 21:02:19 +00006852 * TODO We should check for encoding here and plug-in some
6853 * conversion code !!!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00006854 */
6855
6856 /*
6857 * Wipe out everything which is before the first '<'
6858 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006859 if (IS_BLANK(CUR)) {
6860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006861 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006862 "Extra spaces at the beginning of the document are not allowed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006863 ctxt->errNo = XML_ERR_DOCUMENT_START;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006864 ctxt->wellFormed = 0;
6865 SKIP_BLANKS;
6866 }
6867
6868 if (CUR == 0) {
6869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006870 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006871 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006872 ctxt->wellFormed = 0;
6873 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006874
6875 /*
6876 * Check for the XMLDecl in the Prolog.
6877 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006878 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006879 if ((CUR == '<') && (NXT(1) == '?') &&
6880 (NXT(2) == 'x') && (NXT(3) == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00006881 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006882 xmlParseXMLDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006883 SKIP_BLANKS;
6884 } else if ((CUR == '<') && (NXT(1) == '?') &&
6885 (NXT(2) == 'X') && (NXT(3) == 'M') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00006886 (NXT(4) == 'L') && (IS_BLANK(NXT(5)))) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006887 /*
6888 * The first drafts were using <?XML and the final W3C REC
6889 * now use <?xml ...
6890 */
6891 xmlParseXMLDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006892 SKIP_BLANKS;
6893 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00006894 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006895 }
Daniel Veillard517752b1999-04-05 12:20:10 +00006896 if ((ctxt->sax) && (ctxt->sax->startDocument))
Daniel Veillard27d88741999-05-29 11:51:49 +00006897 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006898
6899 /*
6900 * The Misc part of the Prolog
6901 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006902 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006903 xmlParseMisc(ctxt);
6904
6905 /*
6906 * Then possibly doc type declaration(s) and more Misc
6907 * (doctypedecl Misc*)?
6908 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006909 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006910 if ((CUR == '<') && (NXT(1) == '!') &&
6911 (NXT(2) == 'D') && (NXT(3) == 'O') &&
6912 (NXT(4) == 'C') && (NXT(5) == 'T') &&
6913 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
6914 (NXT(8) == 'E')) {
6915 xmlParseDocTypeDecl(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006916 if (CUR == '[') {
6917 ctxt->instate = XML_PARSER_DTD;
6918 xmlParseInternalSubset(ctxt);
6919 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006920 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006921 xmlParseMisc(ctxt);
6922 }
6923
6924 /*
6925 * Time to start parsing the tree itself
6926 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006927 GROW;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006928 if (CUR != '<') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006930 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006931 "Start tag expect, '<' not found\n");
6932 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006933 ctxt->wellFormed = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006934 ctxt->instate = XML_PARSER_EOF;
6935 } else {
6936 ctxt->instate = XML_PARSER_CONTENT;
6937 xmlParseElement(ctxt);
6938 ctxt->instate = XML_PARSER_EPILOG;
6939
6940
6941 /*
6942 * The Misc part at the end
6943 */
6944 xmlParseMisc(ctxt);
6945
6946 if (CUR != 0) {
6947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6948 ctxt->sax->error(ctxt->userData,
6949 "Extra content at the end of the document\n");
6950 ctxt->wellFormed = 0;
6951 ctxt->errNo = XML_ERR_DOCUMENT_END;
6952 }
6953 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006954 }
6955
Daniel Veillard260a68f1998-08-13 03:39:55 +00006956 /*
6957 * SAX: end of the document processing.
6958 */
Daniel Veillard517752b1999-04-05 12:20:10 +00006959 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006960 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006961 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006962 return(0);
6963}
6964
Daniel Veillardb05deb71999-08-10 19:04:08 +00006965/************************************************************************
6966 * *
Daniel Veillard7f858501999-11-17 17:32:38 +00006967 * Progressive parsing interfaces *
6968 * *
6969 ************************************************************************/
6970
6971/**
6972 * xmlParseLookupSequence:
6973 * @ctxt: an XML parser context
6974 * @first: the first char to lookup
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006975 * @next: the next char to lookup or zero
6976 * @third: the next char to lookup or zero
Daniel Veillard7f858501999-11-17 17:32:38 +00006977 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006978 * Try to find if a sequence (first, next, third) or just (first next) or
6979 * (first) is available in the input stream.
6980 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
6981 * to avoid rescanning sequences of bytes, it DOES change the state of the
6982 * parser, do not use liberally.
Daniel Veillard7f858501999-11-17 17:32:38 +00006983 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006984 * Returns the index to the current parsing point if the full sequence
6985 * is available, -1 otherwise.
Daniel Veillard7f858501999-11-17 17:32:38 +00006986 */
6987int
Daniel Veillarddbfd6411999-12-28 16:35:14 +00006988xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
6989 xmlChar next, xmlChar third) {
6990 int base, len;
6991 xmlParserInputPtr in;
6992 const xmlChar *buf;
6993
6994 in = ctxt->input;
6995 if (in == NULL) return(-1);
6996 base = in->cur - in->base;
6997 if (base < 0) return(-1);
6998 if (ctxt->checkIndex > base)
6999 base = ctxt->checkIndex;
7000 if (in->buf == NULL) {
7001 buf = in->base;
7002 len = in->length;
7003 } else {
7004 buf = in->buf->buffer->content;
7005 len = in->buf->buffer->use;
7006 }
7007 /* take into account the sequence length */
7008 if (third) len -= 2;
7009 else if (next) len --;
7010 for (;base < len;base++) {
7011 if (buf[base] == first) {
7012 if (third != 0) {
7013 if ((buf[base + 1] != next) ||
7014 (buf[base + 2] != third)) continue;
7015 } else if (next != 0) {
7016 if (buf[base + 1] != next) continue;
7017 }
7018 ctxt->checkIndex = 0;
7019#ifdef DEBUG_PUSH
7020 if (next == 0)
7021 fprintf(stderr, "PP: lookup '%c' found at %d\n",
7022 first, base);
7023 else if (third == 0)
7024 fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7025 first, next, base);
7026 else
7027 fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7028 first, next, third, base);
7029#endif
7030 return(base - (in->cur - in->base));
7031 }
7032 }
7033 ctxt->checkIndex = base;
7034#ifdef DEBUG_PUSH
7035 if (next == 0)
7036 fprintf(stderr, "PP: lookup '%c' failed\n", first);
7037 else if (third == 0)
7038 fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7039 else
7040 fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7041#endif
7042 return(-1);
Daniel Veillard7f858501999-11-17 17:32:38 +00007043}
7044
7045/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00007046 * xmlParseTryOrFinish:
Daniel Veillard7f858501999-11-17 17:32:38 +00007047 * @ctxt: an XML parser context
Daniel Veillard71b656e2000-01-05 14:46:17 +00007048 * @terminate: last chunk indicator
Daniel Veillard7f858501999-11-17 17:32:38 +00007049 *
7050 * Try to progress on parsing
7051 *
7052 * Returns zero if no parsing was possible
7053 */
7054int
Daniel Veillard71b656e2000-01-05 14:46:17 +00007055xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
Daniel Veillard7f858501999-11-17 17:32:38 +00007056 int ret = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007057 xmlParserInputPtr in;
7058 int avail;
7059 xmlChar cur, next;
7060
7061#ifdef DEBUG_PUSH
7062 switch (ctxt->instate) {
7063 case XML_PARSER_EOF:
7064 fprintf(stderr, "PP: try EOF\n"); break;
7065 case XML_PARSER_START:
7066 fprintf(stderr, "PP: try START\n"); break;
7067 case XML_PARSER_MISC:
7068 fprintf(stderr, "PP: try MISC\n");break;
7069 case XML_PARSER_COMMENT:
7070 fprintf(stderr, "PP: try COMMENT\n");break;
7071 case XML_PARSER_PROLOG:
7072 fprintf(stderr, "PP: try PROLOG\n");break;
7073 case XML_PARSER_START_TAG:
7074 fprintf(stderr, "PP: try START_TAG\n");break;
7075 case XML_PARSER_CONTENT:
7076 fprintf(stderr, "PP: try CONTENT\n");break;
7077 case XML_PARSER_CDATA_SECTION:
7078 fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7079 case XML_PARSER_END_TAG:
7080 fprintf(stderr, "PP: try END_TAG\n");break;
7081 case XML_PARSER_ENTITY_DECL:
7082 fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7083 case XML_PARSER_ENTITY_VALUE:
7084 fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7085 case XML_PARSER_ATTRIBUTE_VALUE:
7086 fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7087 case XML_PARSER_DTD:
7088 fprintf(stderr, "PP: try DTD\n");break;
7089 case XML_PARSER_EPILOG:
7090 fprintf(stderr, "PP: try EPILOG\n");break;
7091 case XML_PARSER_PI:
7092 fprintf(stderr, "PP: try PI\n");break;
7093 }
7094#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00007095
7096 while (1) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007097 /*
7098 * Pop-up of finished entities.
7099 */
7100 while ((CUR == 0) && (ctxt->inputNr > 1))
7101 xmlPopInput(ctxt);
7102
7103 in = ctxt->input;
7104 if (in == NULL) break;
7105 if (in->buf == NULL)
7106 avail = in->length - (in->cur - in->base);
7107 else
7108 avail = in->buf->buffer->use - (in->cur - in->base);
7109 if (avail < 1)
7110 goto done;
Daniel Veillard7f858501999-11-17 17:32:38 +00007111 switch (ctxt->instate) {
7112 case XML_PARSER_EOF:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007113 /*
7114 * Document parsing is done !
7115 */
7116 goto done;
7117 case XML_PARSER_START:
7118 /*
7119 * Very first chars read from the document flow.
7120 */
7121 cur = in->cur[0];
7122 if (IS_BLANK(cur)) {
7123 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7124 ctxt->sax->setDocumentLocator(ctxt->userData,
7125 &xmlDefaultSAXLocator);
7126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127 ctxt->sax->error(ctxt->userData,
7128 "Extra spaces at the beginning of the document are not allowed\n");
7129 ctxt->errNo = XML_ERR_DOCUMENT_START;
7130 ctxt->wellFormed = 0;
7131 SKIP_BLANKS;
7132 ret++;
7133 if (in->buf == NULL)
7134 avail = in->length - (in->cur - in->base);
7135 else
7136 avail = in->buf->buffer->use - (in->cur - in->base);
7137 }
7138 if (avail < 2)
7139 goto done;
7140
7141 cur = in->cur[0];
7142 next = in->cur[1];
7143 if (cur == 0) {
7144 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7145 ctxt->sax->setDocumentLocator(ctxt->userData,
7146 &xmlDefaultSAXLocator);
7147 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7148 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7149 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7150 ctxt->wellFormed = 0;
7151 ctxt->instate = XML_PARSER_EOF;
7152#ifdef DEBUG_PUSH
7153 fprintf(stderr, "PP: entering EOF\n");
7154#endif
7155 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7156 ctxt->sax->endDocument(ctxt->userData);
7157 goto done;
7158 }
7159 if ((cur == '<') && (next == '?')) {
7160 /* PI or XML decl */
7161 if (avail < 5) return(ret);
Daniel Veillard71b656e2000-01-05 14:46:17 +00007162 if ((!terminate) &&
7163 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007164 return(ret);
7165 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7166 ctxt->sax->setDocumentLocator(ctxt->userData,
7167 &xmlDefaultSAXLocator);
7168 if ((in->cur[2] == 'x') &&
7169 (in->cur[3] == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00007170 (in->cur[4] == 'l') &&
7171 (IS_BLANK(in->cur[5]))) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007172 ret += 5;
7173#ifdef DEBUG_PUSH
7174 fprintf(stderr, "PP: Parsing XML Decl\n");
7175#endif
7176 xmlParseXMLDecl(ctxt);
7177 if ((ctxt->sax) && (ctxt->sax->startDocument))
7178 ctxt->sax->startDocument(ctxt->userData);
7179 ctxt->instate = XML_PARSER_MISC;
7180#ifdef DEBUG_PUSH
7181 fprintf(stderr, "PP: entering MISC\n");
7182#endif
7183 } else {
7184 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7185 if ((ctxt->sax) && (ctxt->sax->startDocument))
7186 ctxt->sax->startDocument(ctxt->userData);
7187 ctxt->instate = XML_PARSER_MISC;
7188#ifdef DEBUG_PUSH
7189 fprintf(stderr, "PP: entering MISC\n");
7190#endif
7191 }
7192 } else {
7193 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7194 ctxt->sax->setDocumentLocator(ctxt->userData,
7195 &xmlDefaultSAXLocator);
7196 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7197 if ((ctxt->sax) && (ctxt->sax->startDocument))
7198 ctxt->sax->startDocument(ctxt->userData);
7199 ctxt->instate = XML_PARSER_MISC;
7200#ifdef DEBUG_PUSH
7201 fprintf(stderr, "PP: entering MISC\n");
7202#endif
7203 }
7204 break;
7205 case XML_PARSER_MISC:
7206 SKIP_BLANKS;
7207 if (in->buf == NULL)
7208 avail = in->length - (in->cur - in->base);
7209 else
7210 avail = in->buf->buffer->use - (in->cur - in->base);
7211 if (avail < 2)
7212 goto done;
7213 cur = in->cur[0];
7214 next = in->cur[1];
7215 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007216 if ((!terminate) &&
7217 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007218 goto done;
7219#ifdef DEBUG_PUSH
7220 fprintf(stderr, "PP: Parsing PI\n");
7221#endif
7222 xmlParsePI(ctxt);
7223 } else if ((cur == '<') && (next == '!') &&
7224 (in->cur[2] == '-') && (in->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007225 if ((!terminate) &&
7226 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007227 goto done;
7228#ifdef DEBUG_PUSH
7229 fprintf(stderr, "PP: Parsing Comment\n");
7230#endif
7231 xmlParseComment(ctxt);
7232 ctxt->instate = XML_PARSER_MISC;
7233 } else if ((cur == '<') && (next == '!') &&
7234 (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
7235 (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
7236 (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
7237 (in->cur[8] == 'E')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007238 if ((!terminate) &&
7239 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007240 goto done;
7241#ifdef DEBUG_PUSH
7242 fprintf(stderr, "PP: Parsing internal subset\n");
7243#endif
7244 xmlParseDocTypeDecl(ctxt);
7245 if (CUR == '[') {
7246 ctxt->instate = XML_PARSER_DTD;
7247#ifdef DEBUG_PUSH
7248 fprintf(stderr, "PP: entering DTD\n");
7249#endif
7250 } else {
7251 ctxt->instate = XML_PARSER_PROLOG;
7252#ifdef DEBUG_PUSH
7253 fprintf(stderr, "PP: entering PROLOG\n");
7254#endif
7255 }
7256 } else if ((cur == '<') && (next == '!') &&
7257 (avail < 9)) {
7258 goto done;
7259 } else {
7260 ctxt->instate = XML_PARSER_START_TAG;
7261#ifdef DEBUG_PUSH
7262 fprintf(stderr, "PP: entering START_TAG\n");
7263#endif
7264 }
7265 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007266 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007267 SKIP_BLANKS;
7268 if (in->buf == NULL)
7269 avail = in->length - (in->cur - in->base);
7270 else
7271 avail = in->buf->buffer->use - (in->cur - in->base);
7272 if (avail < 2)
7273 goto done;
7274 cur = in->cur[0];
7275 next = in->cur[1];
7276 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007277 if ((!terminate) &&
7278 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007279 goto done;
7280#ifdef DEBUG_PUSH
7281 fprintf(stderr, "PP: Parsing PI\n");
7282#endif
7283 xmlParsePI(ctxt);
7284 } else if ((cur == '<') && (next == '!') &&
7285 (in->cur[2] == '-') && (in->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007286 if ((!terminate) &&
7287 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007288 goto done;
7289#ifdef DEBUG_PUSH
7290 fprintf(stderr, "PP: Parsing Comment\n");
7291#endif
7292 xmlParseComment(ctxt);
7293 ctxt->instate = XML_PARSER_PROLOG;
7294 } else if ((cur == '<') && (next == '!') &&
7295 (avail < 4)) {
7296 goto done;
7297 } else {
7298 ctxt->instate = XML_PARSER_START_TAG;
7299#ifdef DEBUG_PUSH
7300 fprintf(stderr, "PP: entering START_TAG\n");
7301#endif
7302 }
7303 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007304 case XML_PARSER_EPILOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007305 SKIP_BLANKS;
7306 if (in->buf == NULL)
7307 avail = in->length - (in->cur - in->base);
7308 else
7309 avail = in->buf->buffer->use - (in->cur - in->base);
7310 if (avail < 2)
7311 goto done;
7312 cur = in->cur[0];
7313 next = in->cur[1];
7314 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007315 if ((!terminate) &&
7316 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007317 goto done;
7318#ifdef DEBUG_PUSH
7319 fprintf(stderr, "PP: Parsing PI\n");
7320#endif
7321 xmlParsePI(ctxt);
7322 ctxt->instate = XML_PARSER_EPILOG;
7323 } else if ((cur == '<') && (next == '!') &&
7324 (in->cur[2] == '-') && (in->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007325 if ((!terminate) &&
7326 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007327 goto done;
7328#ifdef DEBUG_PUSH
7329 fprintf(stderr, "PP: Parsing Comment\n");
7330#endif
7331 xmlParseComment(ctxt);
7332 ctxt->instate = XML_PARSER_EPILOG;
7333 } else if ((cur == '<') && (next == '!') &&
7334 (avail < 4)) {
7335 goto done;
7336 } else {
7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7338 ctxt->sax->error(ctxt->userData,
7339 "Extra content at the end of the document\n");
7340 ctxt->wellFormed = 0;
7341 ctxt->errNo = XML_ERR_DOCUMENT_END;
7342 ctxt->instate = XML_PARSER_EOF;
7343#ifdef DEBUG_PUSH
7344 fprintf(stderr, "PP: entering EOF\n");
7345#endif
7346 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7347 ctxt->sax->endDocument(ctxt->userData);
7348 goto done;
7349 }
7350 break;
7351 case XML_PARSER_START_TAG: {
7352 xmlChar *name, *oldname;
7353
7354 if (avail < 2)
7355 goto done;
7356 cur = in->cur[0];
7357 if (cur != '<') {
7358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7359 ctxt->sax->error(ctxt->userData,
7360 "Start tag expect, '<' not found\n");
7361 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7362 ctxt->wellFormed = 0;
7363 ctxt->instate = XML_PARSER_EOF;
7364#ifdef DEBUG_PUSH
7365 fprintf(stderr, "PP: entering EOF\n");
7366#endif
7367 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7368 ctxt->sax->endDocument(ctxt->userData);
7369 goto done;
7370 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00007371 if ((!terminate) &&
7372 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007373 goto done;
7374 name = xmlParseStartTag(ctxt);
7375 if (name == NULL) {
7376 ctxt->instate = XML_PARSER_EOF;
7377#ifdef DEBUG_PUSH
7378 fprintf(stderr, "PP: entering EOF\n");
7379#endif
7380 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7381 ctxt->sax->endDocument(ctxt->userData);
7382 goto done;
7383 }
7384 namePush(ctxt, xmlStrdup(name));
7385
7386 /*
7387 * [ VC: Root Element Type ]
7388 * The Name in the document type declaration must match
7389 * the element type of the root element.
7390 */
7391 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7392 ctxt->node && (ctxt->node == ctxt->myDoc->root))
7393 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7394
7395 /*
7396 * Check for an Empty Element.
7397 */
7398 if ((CUR == '/') && (NXT(1) == '>')) {
7399 SKIP(2);
7400 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
7401 ctxt->sax->endElement(ctxt->userData, name);
7402 xmlFree(name);
7403 oldname = namePop(ctxt);
7404 if (oldname != NULL) {
7405#ifdef DEBUG_STACK
7406 fprintf(stderr,"Close: popped %s\n", oldname);
7407#endif
7408 xmlFree(oldname);
7409 }
7410 if (ctxt->name == NULL) {
7411 ctxt->instate = XML_PARSER_EPILOG;
7412#ifdef DEBUG_PUSH
7413 fprintf(stderr, "PP: entering EPILOG\n");
7414#endif
7415 } else {
7416 ctxt->instate = XML_PARSER_CONTENT;
7417#ifdef DEBUG_PUSH
7418 fprintf(stderr, "PP: entering CONTENT\n");
7419#endif
7420 }
7421 break;
7422 }
7423 if (CUR == '>') {
7424 NEXT;
7425 } else {
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData,
7428 "Couldn't find end of Start Tag %s\n",
7429 name);
7430 ctxt->wellFormed = 0;
7431 ctxt->errNo = XML_ERR_GT_REQUIRED;
7432
7433 /*
7434 * end of parsing of this node.
7435 */
7436 nodePop(ctxt);
7437 oldname = namePop(ctxt);
7438 if (oldname != NULL) {
7439#ifdef DEBUG_STACK
7440 fprintf(stderr,"Close: popped %s\n", oldname);
7441#endif
7442 xmlFree(oldname);
7443 }
7444 }
7445 xmlFree(name);
7446 ctxt->instate = XML_PARSER_CONTENT;
7447#ifdef DEBUG_PUSH
7448 fprintf(stderr, "PP: entering CONTENT\n");
7449#endif
7450 break;
7451 }
7452 case XML_PARSER_CONTENT:
7453 /*
7454 * Handle preparsed entities and charRef
7455 */
7456 if (ctxt->token != 0) {
7457 xmlChar cur[2] = { 0 , 0 } ;
7458
7459 cur[0] = (xmlChar) ctxt->token;
7460 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
7461 ctxt->sax->characters(ctxt->userData, cur, 1);
7462 ctxt->token = 0;
7463 }
7464 if (avail < 2)
7465 goto done;
7466 cur = in->cur[0];
7467 next = in->cur[1];
7468 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007469 if ((!terminate) &&
7470 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007471 goto done;
7472#ifdef DEBUG_PUSH
7473 fprintf(stderr, "PP: Parsing PI\n");
7474#endif
7475 xmlParsePI(ctxt);
7476 } else if ((cur == '<') && (next == '!') &&
7477 (in->cur[2] == '-') && (in->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007478 if ((!terminate) &&
7479 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007480 goto done;
7481#ifdef DEBUG_PUSH
7482 fprintf(stderr, "PP: Parsing Comment\n");
7483#endif
7484 xmlParseComment(ctxt);
7485 ctxt->instate = XML_PARSER_CONTENT;
7486 } else if ((cur == '<') && (in->cur[1] == '!') &&
7487 (in->cur[2] == '[') && (NXT(3) == 'C') &&
7488 (in->cur[4] == 'D') && (NXT(5) == 'A') &&
7489 (in->cur[6] == 'T') && (NXT(7) == 'A') &&
7490 (in->cur[8] == '[')) {
7491 SKIP(9);
7492 ctxt->instate = XML_PARSER_CDATA_SECTION;
7493#ifdef DEBUG_PUSH
7494 fprintf(stderr, "PP: entering CDATA_SECTION\n");
7495#endif
7496 break;
7497 } else if ((cur == '<') && (next == '!') &&
7498 (avail < 9)) {
7499 goto done;
7500 } else if ((cur == '<') && (next == '/')) {
7501 ctxt->instate = XML_PARSER_END_TAG;
7502#ifdef DEBUG_PUSH
7503 fprintf(stderr, "PP: entering END_TAG\n");
7504#endif
7505 break;
7506 } else if (cur == '<') {
7507 ctxt->instate = XML_PARSER_START_TAG;
7508#ifdef DEBUG_PUSH
7509 fprintf(stderr, "PP: entering START_TAG\n");
7510#endif
7511 break;
7512 } else if (cur == '&') {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007513 if ((!terminate) &&
7514 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007515 goto done;
7516#ifdef DEBUG_PUSH
7517 fprintf(stderr, "PP: Parsing Reference\n");
7518#endif
7519 /* TODO: check generation of subtrees if noent !!! */
7520 xmlParseReference(ctxt);
7521 } else {
7522 /* TODO Avoid the extra copy, handle directly !!!!!! */
7523 /*
7524 * Goal of the following test is :
7525 * - minimize calls to the SAX 'character' callback
7526 * when they are mergeable
7527 * - handle an problem for isBlank when we only parse
7528 * a sequence of blank chars and the next one is
7529 * not available to check against '<' presence.
7530 * - tries to homogenize the differences in SAX
7531 * callbacks beween the push and pull versions
7532 * of the parser.
7533 */
7534 if ((ctxt->inputNr == 1) &&
7535 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00007536 if ((!terminate) &&
7537 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007538 goto done;
7539 }
7540 ctxt->checkIndex = 0;
7541#ifdef DEBUG_PUSH
7542 fprintf(stderr, "PP: Parsing char data\n");
7543#endif
7544 xmlParseCharData(ctxt, 0);
7545 }
7546 /*
7547 * Pop-up of finished entities.
7548 */
7549 while ((CUR == 0) && (ctxt->inputNr > 1))
7550 xmlPopInput(ctxt);
7551 break;
7552 case XML_PARSER_CDATA_SECTION: {
7553 /*
7554 * The Push mode need to have the SAX callback for
7555 * cdataBlock merge back contiguous callbacks.
7556 */
7557 int base;
7558
7559 in = ctxt->input;
7560 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7561 if (base < 0) {
7562 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
7563 if (ctxt->sax != NULL) {
7564 if (ctxt->sax->cdataBlock != NULL)
7565 ctxt->sax->cdataBlock(ctxt->userData, in->cur,
7566 XML_PARSER_BIG_BUFFER_SIZE);
7567 }
7568 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7569 ctxt->checkIndex = 0;
7570 }
7571 goto done;
7572 } else {
7573 if ((ctxt->sax != NULL) && (base > 0)) {
7574 if (ctxt->sax->cdataBlock != NULL)
7575 ctxt->sax->cdataBlock(ctxt->userData,
7576 in->cur, base);
7577 }
7578 SKIP(base + 3);
7579 ctxt->checkIndex = 0;
7580 ctxt->instate = XML_PARSER_CONTENT;
7581#ifdef DEBUG_PUSH
7582 fprintf(stderr, "PP: entering CONTENT\n");
7583#endif
7584 }
7585 break;
7586 }
Daniel Veillard5e5c6231999-12-29 12:49:06 +00007587 case XML_PARSER_END_TAG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007588 if (avail < 2)
7589 goto done;
Daniel Veillard71b656e2000-01-05 14:46:17 +00007590 if ((!terminate) &&
7591 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007592 goto done;
7593 xmlParseEndTag(ctxt);
7594 if (ctxt->name == NULL) {
7595 ctxt->instate = XML_PARSER_EPILOG;
7596#ifdef DEBUG_PUSH
7597 fprintf(stderr, "PP: entering EPILOG\n");
7598#endif
7599 } else {
7600 ctxt->instate = XML_PARSER_CONTENT;
7601#ifdef DEBUG_PUSH
7602 fprintf(stderr, "PP: entering CONTENT\n");
7603#endif
7604 }
7605 break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007606 case XML_PARSER_DTD: {
7607 /*
7608 * Sorry but progressive parsing of the internal subset
7609 * is not expected to be supported. We first check that
7610 * the full content of the internal subset is available and
7611 * the parsing is launched only at that point.
7612 * Internal subset ends up with "']' S? '>'" in an unescaped
7613 * section and not in a ']]>' sequence which are conditional
7614 * sections (whoever argued to keep that crap in XML deserve
7615 * a place in hell !).
7616 */
7617 int base, i;
7618 xmlChar *buf;
7619 xmlChar quote = 0;
7620
7621 base = in->cur - in->base;
7622 if (base < 0) return(0);
7623 if (ctxt->checkIndex > base)
7624 base = ctxt->checkIndex;
7625 buf = in->buf->buffer->content;
7626 for (;base < in->buf->buffer->use;base++) {
7627 if (quote != 0) {
7628 if (buf[base] == quote)
7629 quote = 0;
7630 continue;
7631 }
7632 if (buf[base] == '"') {
7633 quote = '"';
7634 continue;
7635 }
7636 if (buf[base] == '\'') {
7637 quote = '\'';
7638 continue;
7639 }
7640 if (buf[base] == ']') {
7641 if (base +1 >= in->buf->buffer->use)
7642 break;
7643 if (buf[base + 1] == ']') {
7644 /* conditional crap, skip both ']' ! */
7645 base++;
7646 continue;
7647 }
7648 for (i = 0;base + i < in->buf->buffer->use;i++) {
7649 if (buf[base + i] == '>')
7650 goto found_end_int_subset;
7651 }
7652 break;
7653 }
7654 }
7655 /*
7656 * We didn't found the end of the Internal subset
7657 */
7658 if (quote == 0)
7659 ctxt->checkIndex = base;
7660#ifdef DEBUG_PUSH
7661 if (next == 0)
7662 fprintf(stderr, "PP: lookup of int subset end filed\n");
7663#endif
7664 goto done;
7665
7666found_end_int_subset:
7667 xmlParseInternalSubset(ctxt);
7668 ctxt->instate = XML_PARSER_PROLOG;
7669 ctxt->checkIndex = 0;
7670#ifdef DEBUG_PUSH
7671 fprintf(stderr, "PP: entering PROLOG\n");
7672#endif
7673 break;
7674 }
Daniel Veillard7f858501999-11-17 17:32:38 +00007675 case XML_PARSER_COMMENT:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007676 fprintf(stderr, "PP: internal error, state == COMMENT\n");
7677 ctxt->instate = XML_PARSER_CONTENT;
7678#ifdef DEBUG_PUSH
7679 fprintf(stderr, "PP: entering CONTENT\n");
7680#endif
7681 break;
7682 case XML_PARSER_PI:
7683 fprintf(stderr, "PP: internal error, state == PI\n");
7684 ctxt->instate = XML_PARSER_CONTENT;
7685#ifdef DEBUG_PUSH
7686 fprintf(stderr, "PP: entering CONTENT\n");
7687#endif
7688 break;
7689 case XML_PARSER_ENTITY_DECL:
7690 fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
7691 ctxt->instate = XML_PARSER_DTD;
7692#ifdef DEBUG_PUSH
7693 fprintf(stderr, "PP: entering DTD\n");
7694#endif
7695 break;
7696 case XML_PARSER_ENTITY_VALUE:
7697 fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
7698 ctxt->instate = XML_PARSER_CONTENT;
7699#ifdef DEBUG_PUSH
7700 fprintf(stderr, "PP: entering DTD\n");
7701#endif
7702 break;
7703 case XML_PARSER_ATTRIBUTE_VALUE:
7704 fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
7705 ctxt->instate = XML_PARSER_START_TAG;
7706#ifdef DEBUG_PUSH
7707 fprintf(stderr, "PP: entering START_TAG\n");
7708#endif
7709 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00007710 }
7711 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007712done:
7713#ifdef DEBUG_PUSH
7714 fprintf(stderr, "PP: done %d\n", ret);
7715#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00007716 return(ret);
7717}
7718
7719/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00007720 * xmlParseTry:
7721 * @ctxt: an XML parser context
7722 *
7723 * Try to progress on parsing
7724 *
7725 * Returns zero if no parsing was possible
7726 */
7727int
7728xmlParseTry(xmlParserCtxtPtr ctxt) {
7729 return(xmlParseTryOrFinish(ctxt, 0));
7730}
7731
7732/**
Daniel Veillard7f858501999-11-17 17:32:38 +00007733 * xmlParseChunk:
7734 * @ctxt: an XML parser context
7735 * @chunk: an char array
7736 * @size: the size in byte of the chunk
7737 * @terminate: last chunk indicator
7738 *
7739 * Parse a Chunk of memory
7740 *
7741 * Returns zero if no error, the xmlParserErrors otherwise.
7742 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007743int
Daniel Veillard7f858501999-11-17 17:32:38 +00007744xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
7745 int terminate) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00007746 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007747 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
7748 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
7749 int cur = ctxt->input->cur - ctxt->input->base;
7750
Daniel Veillarda819dac1999-11-24 18:04:22 +00007751 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007752 ctxt->input->base = ctxt->input->buf->buffer->content + base;
7753 ctxt->input->cur = ctxt->input->base + cur;
7754#ifdef DEBUG_PUSH
7755 fprintf(stderr, "PP: pushed %d\n", size);
7756#endif
7757
Daniel Veillardd0f7f742000-02-02 17:42:48 +00007758 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
7759 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007760 } else if (ctxt->instate != XML_PARSER_EOF)
Daniel Veillard71b656e2000-01-05 14:46:17 +00007761 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007762 if (terminate) {
7763 if ((ctxt->instate != XML_PARSER_EOF) &&
7764 (ctxt->instate != XML_PARSER_EPILOG)) {
7765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7766 ctxt->sax->error(ctxt->userData,
7767 "Extra content at the end of the document\n");
7768 ctxt->wellFormed = 0;
7769 ctxt->errNo = XML_ERR_DOCUMENT_END;
7770 }
7771 if (ctxt->instate != XML_PARSER_EOF) {
7772 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7773 ctxt->sax->endDocument(ctxt->userData);
7774 }
7775 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard7f858501999-11-17 17:32:38 +00007776 }
7777 return((xmlParserErrors) ctxt->errNo);
7778}
7779
7780/************************************************************************
7781 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007782 * I/O front end functions to the parser *
7783 * *
7784 ************************************************************************/
7785
Daniel Veillard11e00581998-10-24 18:27:49 +00007786/**
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007787 * xmlCreatePushParserCtxt :
7788 * @sax: a SAX handler
7789 * @user_data: The user data returned on SAX callbacks
7790 * @chunk: a pointer to an array of chars
7791 * @size: number of chars in the array
7792 * @filename: an optional file name or URI
7793 *
7794 * Create a parser context for using the XML parser in push mode
7795 * To allow content encoding detection, @size should be >= 4
7796 * The value of @filename is used for fetching external entities
7797 * and error/warning reports.
7798 *
7799 * Returns the new parser context or NULL
7800 */
7801xmlParserCtxtPtr
7802xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
7803 const char *chunk, int size, const char *filename) {
7804 xmlParserCtxtPtr ctxt;
7805 xmlParserInputPtr inputStream;
7806 xmlParserInputBufferPtr buf;
7807 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
7808
7809 /*
7810 * plug some encoding conversion routines here. !!!
7811 */
7812 if ((chunk != NULL) && (size >= 4))
7813 enc = xmlDetectCharEncoding((const xmlChar *) chunk);
7814
7815 buf = xmlAllocParserInputBuffer(enc);
7816 if (buf == NULL) return(NULL);
7817
7818 ctxt = xmlNewParserCtxt();
7819 if (ctxt == NULL) {
7820 xmlFree(buf);
7821 return(NULL);
7822 }
7823 if (sax != NULL) {
7824 if (ctxt->sax != &xmlDefaultSAXHandler)
7825 xmlFree(ctxt->sax);
7826 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
7827 if (ctxt->sax == NULL) {
7828 xmlFree(buf);
7829 xmlFree(ctxt);
7830 return(NULL);
7831 }
7832 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
7833 if (user_data != NULL)
7834 ctxt->userData = user_data;
7835 }
7836 if (filename == NULL) {
7837 ctxt->directory = NULL;
7838 } else {
7839 ctxt->directory = xmlParserGetDirectory(filename);
7840 }
7841
7842 inputStream = xmlNewInputStream(ctxt);
7843 if (inputStream == NULL) {
7844 xmlFreeParserCtxt(ctxt);
7845 return(NULL);
7846 }
7847
7848 if (filename == NULL)
7849 inputStream->filename = NULL;
7850 else
7851 inputStream->filename = xmlMemStrdup(filename);
7852 inputStream->buf = buf;
7853 inputStream->base = inputStream->buf->buffer->content;
7854 inputStream->cur = inputStream->buf->buffer->content;
7855
7856 inputPush(ctxt, inputStream);
7857
7858 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
7859 (ctxt->input->buf != NULL)) {
7860 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
7861#ifdef DEBUG_PUSH
7862 fprintf(stderr, "PP: pushed %d\n", size);
7863#endif
7864 }
7865
7866 return(ctxt);
7867}
7868
7869/**
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007870 * xmlCreateDocParserCtxt :
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007871 * @cur: a pointer to an array of xmlChar
Daniel Veillardd692aa41999-02-28 21:54:31 +00007872 *
7873 * Create a parser context for an XML in-memory document.
7874 *
7875 * Returns the new parser context or NULL
7876 */
7877xmlParserCtxtPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007878xmlCreateDocParserCtxt(xmlChar *cur) {
Daniel Veillardd692aa41999-02-28 21:54:31 +00007879 xmlParserCtxtPtr ctxt;
7880 xmlParserInputPtr input;
Daniel Veillard27d88741999-05-29 11:51:49 +00007881 xmlCharEncoding enc;
Daniel Veillardd692aa41999-02-28 21:54:31 +00007882
Daniel Veillardb05deb71999-08-10 19:04:08 +00007883 ctxt = xmlNewParserCtxt();
Daniel Veillardd692aa41999-02-28 21:54:31 +00007884 if (ctxt == NULL) {
Daniel Veillardd692aa41999-02-28 21:54:31 +00007885 return(NULL);
7886 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00007887 input = xmlNewInputStream(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00007888 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007889 xmlFreeParserCtxt(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00007890 return(NULL);
7891 }
7892
Daniel Veillard27d88741999-05-29 11:51:49 +00007893 /*
7894 * plug some encoding conversion routines here. !!!
7895 */
7896 enc = xmlDetectCharEncoding(cur);
7897 xmlSwitchEncoding(ctxt, enc);
7898
Daniel Veillardd692aa41999-02-28 21:54:31 +00007899 input->base = cur;
7900 input->cur = cur;
Daniel Veillardd692aa41999-02-28 21:54:31 +00007901
7902 inputPush(ctxt, input);
7903 return(ctxt);
7904}
7905
7906/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00007907 * xmlSAXParseDoc :
7908 * @sax: the SAX handler block
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007909 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007910 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
7911 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00007912 *
7913 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00007914 * It use the given SAX function block to handle the parsing callback.
7915 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00007916 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00007917 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00007918 */
7919
Daniel Veillard1e346af1999-02-22 10:33:01 +00007920xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007921xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007922 xmlDocPtr ret;
7923 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007924
7925 if (cur == NULL) return(NULL);
7926
Daniel Veillardd692aa41999-02-28 21:54:31 +00007927
7928 ctxt = xmlCreateDocParserCtxt(cur);
7929 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00007930 if (sax != NULL) {
7931 ctxt->sax = sax;
7932 ctxt->userData = NULL;
7933 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007934
7935 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00007936 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007937 else {
7938 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00007939 xmlFreeDoc(ctxt->myDoc);
7940 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007941 }
Daniel Veillard97fea181999-06-26 23:07:37 +00007942 if (sax != NULL)
7943 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00007944 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007945
7946 return(ret);
7947}
7948
Daniel Veillard11e00581998-10-24 18:27:49 +00007949/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00007950 * xmlParseDoc :
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007951 * @cur: a pointer to an array of xmlChar
Daniel Veillard42dc9b31998-11-09 01:17:21 +00007952 *
7953 * parse an XML in-memory document and build a tree.
7954 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00007955 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00007956 */
7957
Daniel Veillard1e346af1999-02-22 10:33:01 +00007958xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007959xmlParseDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007960 return(xmlSAXParseDoc(NULL, cur, 0));
7961}
7962
7963/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00007964 * xmlSAXParseDTD :
7965 * @sax: the SAX handler block
7966 * @ExternalID: a NAME* containing the External ID of the DTD
7967 * @SystemID: a NAME* containing the URL to the DTD
7968 *
7969 * Load and parse an external subset.
7970 *
7971 * Returns the resulting xmlDtdPtr or NULL in case of error.
7972 */
7973
7974xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007975xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
7976 const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00007977 xmlDtdPtr ret = NULL;
7978 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +00007979 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00007980 xmlCharEncoding enc;
7981
7982 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
7983
Daniel Veillardb05deb71999-08-10 19:04:08 +00007984 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +00007985 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00007986 return(NULL);
7987 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007988
7989 /*
7990 * Set-up the SAX context
7991 */
7992 if (ctxt == NULL) return(NULL);
7993 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007994 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00007995 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +00007996 ctxt->sax = sax;
7997 ctxt->userData = NULL;
7998 }
7999
8000 /*
8001 * Ask the Entity resolver to load the damn thing
8002 */
8003
8004 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8005 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8006 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +00008007 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008008 xmlFreeParserCtxt(ctxt);
8009 return(NULL);
8010 }
8011
8012 /*
8013 * plug some encoding conversion routines here. !!!
8014 */
8015 xmlPushInput(ctxt, input);
8016 enc = xmlDetectCharEncoding(ctxt->input->cur);
8017 xmlSwitchEncoding(ctxt, enc);
8018
Daniel Veillardb05deb71999-08-10 19:04:08 +00008019 if (input->filename == NULL)
Daniel Veillardb96e6431999-08-29 21:02:19 +00008020 input->filename = (char *) xmlStrdup(SystemID); /* !!!!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00008021 input->line = 1;
8022 input->col = 1;
8023 input->base = ctxt->input->cur;
8024 input->cur = ctxt->input->cur;
8025 input->free = NULL;
8026
8027 /*
8028 * let's parse that entity knowing it's an external subset.
8029 */
8030 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8031
8032 if (ctxt->myDoc != NULL) {
8033 if (ctxt->wellFormed) {
8034 ret = ctxt->myDoc->intSubset;
8035 ctxt->myDoc->intSubset = NULL;
8036 } else {
8037 ret = NULL;
8038 }
8039 xmlFreeDoc(ctxt->myDoc);
8040 ctxt->myDoc = NULL;
8041 }
Daniel Veillard97fea181999-06-26 23:07:37 +00008042 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00008043 xmlFreeParserCtxt(ctxt);
8044
8045 return(ret);
8046}
8047
8048/**
8049 * xmlParseDTD :
8050 * @ExternalID: a NAME* containing the External ID of the DTD
8051 * @SystemID: a NAME* containing the URL to the DTD
8052 *
8053 * Load and parse an external subset.
8054 *
8055 * Returns the resulting xmlDtdPtr or NULL in case of error.
8056 */
8057
8058xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008059xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008060 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8061}
8062
8063/**
Daniel Veillard0142b842000-01-14 14:45:24 +00008064 * xmlSAXParseBalancedChunk :
8065 * @ctx: an XML parser context (possibly NULL)
8066 * @sax: the SAX handler bloc (possibly NULL)
8067 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8068 * @input: a parser input stream
8069 * @enc: the encoding
8070 *
8071 * Parse a well-balanced chunk of an XML document
8072 * The user has to provide SAX callback block whose routines will be
8073 * called by the parser
8074 * The allowed sequence for the Well Balanced Chunk is the one defined by
8075 * the content production in the XML grammar:
8076 *
8077 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8078 *
8079 * Returns 0 id the chunk is well balanced, -1 in case of args problem and
8080 * the error code otherwise
8081 */
8082
8083int
8084xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8085 void *user_data, xmlParserInputPtr input,
8086 xmlCharEncoding enc) {
8087 xmlParserCtxtPtr ctxt;
8088 int ret;
8089
8090 if (input == NULL) return(-1);
8091
8092 if (ctx != NULL)
8093 ctxt = ctx;
8094 else {
8095 ctxt = xmlNewParserCtxt();
8096 if (ctxt == NULL)
8097 return(-1);
8098 if (sax == NULL)
8099 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8100 }
8101
8102 /*
8103 * Set-up the SAX context
8104 */
8105 if (sax != NULL) {
8106 if (ctxt->sax != NULL)
8107 xmlFree(ctxt->sax);
8108 ctxt->sax = sax;
8109 ctxt->userData = user_data;
8110 }
8111
8112 /*
8113 * plug some encoding conversion routines here.
8114 */
8115 xmlPushInput(ctxt, input);
8116 if (enc != XML_CHAR_ENCODING_NONE)
8117 xmlSwitchEncoding(ctxt, enc);
8118
8119 /*
8120 * let's parse that entity knowing it's an external subset.
8121 */
8122 xmlParseContent(ctxt);
8123 ret = ctxt->errNo;
8124
8125 if (ctx == NULL) {
8126 if (sax != NULL)
8127 ctxt->sax = NULL;
8128 else
8129 xmlFreeDoc(ctxt->myDoc);
8130 xmlFreeParserCtxt(ctxt);
8131 }
8132 return(ret);
8133}
8134
8135/**
8136 * xmlParseBalancedChunk :
8137 * @doc: the document the chunk pertains to
8138 * @node: the node defining the context in which informations will be added
8139 *
8140 * Parse a well-balanced chunk of an XML document present in memory
8141 *
8142 * Returns the resulting list of nodes resulting from the parsing,
8143 * they are not added to @node
8144 */
8145
8146xmlNodePtr
8147xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlNodePtr node) {
8148}
8149
8150/**
8151 * xmlParseBalancedChunkFile :
8152 * @doc: the document the chunk pertains to
8153 *
8154 * Parse a well-balanced chunk of an XML document contained in a file
8155 *
8156 * Returns the resulting list of nodes resulting from the parsing,
8157 * they are not added to @node
8158 */
8159
8160xmlNodePtr
8161xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
8162}
8163
8164/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008165 * xmlRecoverDoc :
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008166 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008167 *
8168 * parse an XML in-memory document and build a tree.
8169 * In the case the document is not Well Formed, a tree is built anyway
8170 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008171 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008172 */
8173
Daniel Veillard1e346af1999-02-22 10:33:01 +00008174xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008175xmlRecoverDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008176 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008177}
8178
8179/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00008180 * xmlCreateFileParserCtxt :
Daniel Veillard11e00581998-10-24 18:27:49 +00008181 * @filename: the filename
8182 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00008183 * Create a parser context for a file content.
8184 * Automatic support for ZLIB/Compress compressed document is provided
8185 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +00008186 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00008187 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008188 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00008189xmlParserCtxtPtr
8190xmlCreateFileParserCtxt(const char *filename)
8191{
8192 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008193 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008194 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008195 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008196
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008197 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
8198 if (buf == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008199
Daniel Veillardb05deb71999-08-10 19:04:08 +00008200 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00008201 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008202 return(NULL);
8203 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00008204
8205 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008206 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008207 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008208 return(NULL);
8209 }
8210
Daniel Veillard6454aec1999-09-02 22:04:43 +00008211 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008212 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008213 inputStream->base = inputStream->buf->buffer->content;
8214 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008215
8216 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008217 if ((ctxt->directory == NULL) && (directory == NULL))
8218 directory = xmlParserGetDirectory(filename);
8219 if ((ctxt->directory == NULL) && (directory != NULL))
8220 ctxt->directory = directory;
8221
Daniel Veillardd692aa41999-02-28 21:54:31 +00008222 return(ctxt);
8223}
8224
8225/**
8226 * xmlSAXParseFile :
8227 * @sax: the SAX handler block
8228 * @filename: the filename
8229 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
8230 * documents
8231 *
8232 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8233 * compressed document is provided by default if found at compile-time.
8234 * It use the given SAX function block to handle the parsing callback.
8235 * If sax is NULL, fallback to the default DOM tree building routines.
8236 *
8237 * Returns the resulting document tree
8238 */
8239
Daniel Veillard011b63c1999-06-02 17:44:04 +00008240xmlDocPtr
8241xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +00008242 int recovery) {
8243 xmlDocPtr ret;
8244 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008245 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00008246
8247 ctxt = xmlCreateFileParserCtxt(filename);
8248 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00008249 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008250 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00008251 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +00008252 ctxt->sax = sax;
8253 ctxt->userData = NULL;
8254 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008255
Daniel Veillardb05deb71999-08-10 19:04:08 +00008256 if ((ctxt->directory == NULL) && (directory == NULL))
8257 directory = xmlParserGetDirectory(filename);
8258 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008259 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); /* !!!!!!! */
Daniel Veillardb05deb71999-08-10 19:04:08 +00008260
Daniel Veillard260a68f1998-08-13 03:39:55 +00008261 xmlParseDocument(ctxt);
8262
Daniel Veillard517752b1999-04-05 12:20:10 +00008263 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008264 else {
8265 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00008266 xmlFreeDoc(ctxt->myDoc);
8267 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008268 }
Daniel Veillard97fea181999-06-26 23:07:37 +00008269 if (sax != NULL)
8270 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00008271 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008272
8273 return(ret);
8274}
8275
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008276/**
8277 * xmlParseFile :
8278 * @filename: the filename
8279 *
8280 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8281 * compressed document is provided by default if found at compile-time.
8282 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008283 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008284 */
8285
Daniel Veillard011b63c1999-06-02 17:44:04 +00008286xmlDocPtr
8287xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008288 return(xmlSAXParseFile(NULL, filename, 0));
8289}
8290
8291/**
8292 * xmlRecoverFile :
8293 * @filename: the filename
8294 *
8295 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
8296 * compressed document is provided by default if found at compile-time.
8297 * In the case the document is not Well Formed, a tree is built anyway
8298 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008299 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008300 */
8301
Daniel Veillard011b63c1999-06-02 17:44:04 +00008302xmlDocPtr
8303xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008304 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008305}
Daniel Veillard260a68f1998-08-13 03:39:55 +00008306
Daniel Veillard11e00581998-10-24 18:27:49 +00008307/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00008308 * xmlCreateMemoryParserCtxt :
Daniel Veillard1e346af1999-02-22 10:33:01 +00008309 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +00008310 * @size: the size of the array
Daniel Veillard11e00581998-10-24 18:27:49 +00008311 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00008312 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +00008313 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00008314 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008315 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00008316xmlParserCtxtPtr
8317xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008318 xmlParserCtxtPtr ctxt;
8319 xmlParserInputPtr input;
Daniel Veillard27d88741999-05-29 11:51:49 +00008320 xmlCharEncoding enc;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008321
8322 buffer[size - 1] = '\0';
8323
Daniel Veillardb05deb71999-08-10 19:04:08 +00008324 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00008325 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008326 return(NULL);
8327 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00008328
8329 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008330 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008331 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008332 return(NULL);
8333 }
8334
8335 input->filename = NULL;
8336 input->line = 1;
8337 input->col = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008338 input->buf = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008339 input->consumed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008340
8341 /*
Daniel Veillard27d88741999-05-29 11:51:49 +00008342 * plug some encoding conversion routines here. !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00008343 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00008344 enc = xmlDetectCharEncoding(BAD_CAST buffer);
Daniel Veillard27d88741999-05-29 11:51:49 +00008345 xmlSwitchEncoding(ctxt, enc);
8346
Daniel Veillardb96e6431999-08-29 21:02:19 +00008347 input->base = BAD_CAST buffer;
8348 input->cur = BAD_CAST buffer;
Daniel Veillardd692aa41999-02-28 21:54:31 +00008349 input->free = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008350
8351 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00008352 return(ctxt);
8353}
8354
8355/**
8356 * xmlSAXParseMemory :
8357 * @sax: the SAX handler block
8358 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +00008359 * @size: the size of the array
8360 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
Daniel Veillardd692aa41999-02-28 21:54:31 +00008361 * documents
8362 *
8363 * parse an XML in-memory block and use the given SAX function block
8364 * to handle the parsing callback. If sax is NULL, fallback to the default
8365 * DOM tree building routines.
8366 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00008367 * Returns the resulting document tree
8368 */
8369xmlDocPtr
8370xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
8371 xmlDocPtr ret;
8372 xmlParserCtxtPtr ctxt;
8373
8374 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
8375 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00008376 if (sax != NULL) {
8377 ctxt->sax = sax;
8378 ctxt->userData = NULL;
8379 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008380
8381 xmlParseDocument(ctxt);
8382
Daniel Veillard517752b1999-04-05 12:20:10 +00008383 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008384 else {
8385 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00008386 xmlFreeDoc(ctxt->myDoc);
8387 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008388 }
Daniel Veillard97fea181999-06-26 23:07:37 +00008389 if (sax != NULL)
8390 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00008391 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008392
8393 return(ret);
8394}
8395
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008396/**
8397 * xmlParseMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00008398 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008399 * @size: the size of the array
8400 *
8401 * parse an XML in-memory block and build a tree.
8402 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008403 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008404 */
8405
8406xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008407 return(xmlSAXParseMemory(NULL, buffer, size, 0));
8408}
8409
8410/**
8411 * xmlRecoverMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00008412 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008413 * @size: the size of the array
8414 *
8415 * parse an XML in-memory block and build a tree.
8416 * In the case the document is not Well Formed, a tree is built anyway
8417 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008418 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008419 */
8420
8421xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
8422 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00008423}
Daniel Veillard260a68f1998-08-13 03:39:55 +00008424
Daniel Veillard260a68f1998-08-13 03:39:55 +00008425
Daniel Veillard11e00581998-10-24 18:27:49 +00008426/**
8427 * xmlSetupParserForBuffer:
8428 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008429 * @buffer: a xmlChar * buffer
Daniel Veillard11e00581998-10-24 18:27:49 +00008430 * @filename: a file name
8431 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00008432 * Setup the parser context to parse a new buffer; Clears any prior
8433 * contents from the parser context. The buffer parameter must not be
8434 * NULL, but the filename parameter can be
8435 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008436void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008437xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +00008438 const char* filename)
8439{
Daniel Veillardb05deb71999-08-10 19:04:08 +00008440 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008441
Daniel Veillardb05deb71999-08-10 19:04:08 +00008442 input = xmlNewInputStream(ctxt);
8443 if (input == NULL) {
8444 perror("malloc");
Daniel Veillard6454aec1999-09-02 22:04:43 +00008445 xmlFree(ctxt);
Daniel Veillard0142b842000-01-14 14:45:24 +00008446 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008447 }
8448
8449 xmlClearParserCtxt(ctxt);
8450 if (filename != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00008451 input->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008452 input->base = buffer;
8453 input->cur = buffer;
8454 inputPush(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008455}
8456
Daniel Veillard7a66ee61999-09-26 11:31:02 +00008457/**
8458 * xmlSAXUserParseFile:
8459 * @sax: a SAX handler
8460 * @user_data: The user data returned on SAX callbacks
8461 * @filename: a file name
8462 *
8463 * parse an XML file and call the given SAX handler routines.
8464 * Automatic support for ZLIB/Compress compressed document is provided
8465 *
8466 * Returns 0 in case of success or a error number otherwise
8467 */
Daniel Veillard11a48ec1999-11-23 10:40:46 +00008468int
8469xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
8470 const char *filename) {
Daniel Veillard7a66ee61999-09-26 11:31:02 +00008471 int ret = 0;
8472 xmlParserCtxtPtr ctxt;
8473
8474 ctxt = xmlCreateFileParserCtxt(filename);
8475 if (ctxt == NULL) return -1;
Daniel Veillard294cbca1999-12-03 13:19:09 +00008476 if (ctxt->sax != &xmlDefaultSAXHandler)
8477 xmlFree(ctxt->sax);
Daniel Veillard7a66ee61999-09-26 11:31:02 +00008478 ctxt->sax = sax;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008479 if (user_data != NULL)
8480 ctxt->userData = user_data;
Daniel Veillard7a66ee61999-09-26 11:31:02 +00008481
8482 xmlParseDocument(ctxt);
8483
8484 if (ctxt->wellFormed)
8485 ret = 0;
8486 else {
8487 if (ctxt->errNo != 0)
8488 ret = ctxt->errNo;
8489 else
8490 ret = -1;
8491 }
8492 if (sax != NULL)
8493 ctxt->sax = NULL;
8494 xmlFreeParserCtxt(ctxt);
8495
8496 return ret;
8497}
8498
8499/**
8500 * xmlSAXUserParseMemory:
8501 * @sax: a SAX handler
8502 * @user_data: The user data returned on SAX callbacks
8503 * @buffer: an in-memory XML document input
Daniel Veillard51e3b151999-11-12 17:02:31 +00008504 * @size: the length of the XML document in bytes
Daniel Veillard7a66ee61999-09-26 11:31:02 +00008505 *
8506 * A better SAX parsing routine.
8507 * parse an XML in-memory buffer and call the given SAX handler routines.
8508 *
8509 * Returns 0 in case of success or a error number otherwise
8510 */
8511int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
8512 char *buffer, int size) {
8513 int ret = 0;
8514 xmlParserCtxtPtr ctxt;
8515
8516 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
8517 if (ctxt == NULL) return -1;
8518 ctxt->sax = sax;
8519 ctxt->userData = user_data;
8520
8521 xmlParseDocument(ctxt);
8522
8523 if (ctxt->wellFormed)
8524 ret = 0;
8525 else {
8526 if (ctxt->errNo != 0)
8527 ret = ctxt->errNo;
8528 else
8529 ret = -1;
8530 }
8531 if (sax != NULL)
8532 ctxt->sax = NULL;
8533 xmlFreeParserCtxt(ctxt);
8534
8535 return ret;
8536}
8537
Daniel Veillard260a68f1998-08-13 03:39:55 +00008538
Daniel Veillardb05deb71999-08-10 19:04:08 +00008539/************************************************************************
8540 * *
Daniel Veillard51e3b151999-11-12 17:02:31 +00008541 * Miscellaneous *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008542 * *
8543 ************************************************************************/
8544
Daniel Veillarda819dac1999-11-24 18:04:22 +00008545/**
8546 * xmlCleanupParser:
8547 *
8548 * Cleanup function for the XML parser. It tries to reclaim all
8549 * parsing related global memory allocated for the parser processing.
8550 * It doesn't deallocate any document related memory. Calling this
8551 * function should not prevent reusing the parser.
8552 */
8553
8554void
8555xmlCleanupParser(void) {
8556 xmlCleanupCharEncodingHandlers();
Daniel Veillardf5c2c871999-12-01 09:51:45 +00008557 xmlCleanupPredefinedEntities();
Daniel Veillarda819dac1999-11-24 18:04:22 +00008558}
Daniel Veillardb05deb71999-08-10 19:04:08 +00008559
Daniel Veillard11e00581998-10-24 18:27:49 +00008560/**
8561 * xmlParserFindNodeInfo:
8562 * @ctxt: an XML parser context
8563 * @node: an XML node within the tree
8564 *
8565 * Find the parser node info struct for a given node
8566 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008567 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008568 */
8569const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
8570 const xmlNode* node)
8571{
8572 unsigned long pos;
8573
8574 /* Find position where node should be at */
8575 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
8576 if ( ctx->node_seq.buffer[pos].node == node )
8577 return &ctx->node_seq.buffer[pos];
8578 else
8579 return NULL;
8580}
8581
8582
Daniel Veillard11e00581998-10-24 18:27:49 +00008583/**
8584 * xmlInitNodeInfoSeq :
8585 * @seq: a node info sequence pointer
8586 *
8587 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00008588 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008589void
8590xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00008591{
8592 seq->length = 0;
8593 seq->maximum = 0;
8594 seq->buffer = NULL;
8595}
8596
Daniel Veillard11e00581998-10-24 18:27:49 +00008597/**
8598 * xmlClearNodeInfoSeq :
8599 * @seq: a node info sequence pointer
8600 *
8601 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +00008602 * info sequence
8603 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008604void
8605xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00008606{
8607 if ( seq->buffer != NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +00008608 xmlFree(seq->buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008609 xmlInitNodeInfoSeq(seq);
8610}
8611
8612
Daniel Veillard11e00581998-10-24 18:27:49 +00008613/**
8614 * xmlParserFindNodeInfoIndex:
8615 * @seq: a node info sequence pointer
8616 * @node: an XML node pointer
8617 *
8618 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00008619 * xmlParserFindNodeInfoIndex : Find the index that the info record for
8620 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +00008621 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008622 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +00008623 */
8624unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
8625 const xmlNode* node)
8626{
8627 unsigned long upper, lower, middle;
8628 int found = 0;
8629
8630 /* Do a binary search for the key */
8631 lower = 1;
8632 upper = seq->length;
8633 middle = 0;
8634 while ( lower <= upper && !found) {
8635 middle = lower + (upper - lower) / 2;
8636 if ( node == seq->buffer[middle - 1].node )
8637 found = 1;
8638 else if ( node < seq->buffer[middle - 1].node )
8639 upper = middle - 1;
8640 else
8641 lower = middle + 1;
8642 }
8643
8644 /* Return position */
8645 if ( middle == 0 || seq->buffer[middle - 1].node < node )
8646 return middle;
8647 else
8648 return middle - 1;
8649}
8650
8651
Daniel Veillard11e00581998-10-24 18:27:49 +00008652/**
8653 * xmlParserAddNodeInfo:
8654 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00008655 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +00008656 *
8657 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00008658 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008659void
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008660xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +00008661 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +00008662{
8663 unsigned long pos;
8664 static unsigned int block_size = 5;
8665
8666 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008667 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
8668 if ( pos < ctxt->node_seq.length
8669 && ctxt->node_seq.buffer[pos].node == info->node ) {
8670 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008671 }
8672
8673 /* Otherwise, we need to add new node to buffer */
8674 else {
8675 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008676 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008677 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008678 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
8679 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +00008680
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008681 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +00008682 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008683 else
Daniel Veillard6454aec1999-09-02 22:04:43 +00008684 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008685
8686 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008688 ctxt->sax->error(ctxt->userData, "Out of memory\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008689 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008690 return;
8691 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008692 ctxt->node_seq.buffer = tmp_buffer;
8693 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008694 }
8695
8696 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008697 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008698 unsigned long i;
8699
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008700 for ( i = ctxt->node_seq.length; i > pos; i-- )
8701 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +00008702 }
8703
8704 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008705 ctxt->node_seq.buffer[pos] = *info;
8706 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008707 }
8708}
Daniel Veillard011b63c1999-06-02 17:44:04 +00008709
8710
Daniel Veillardb05deb71999-08-10 19:04:08 +00008711/**
8712 * xmlSubstituteEntitiesDefault :
8713 * @val: int 0 or 1
8714 *
8715 * Set and return the previous value for default entity support.
8716 * Initially the parser always keep entity references instead of substituting
8717 * entity values in the output. This function has to be used to change the
8718 * default parser behaviour
8719 * SAX::subtituteEntities() has to be used for changing that on a file by
8720 * file basis.
8721 *
8722 * Returns the last value for 0 for no substitution, 1 for substitution.
8723 */
8724
8725int
8726xmlSubstituteEntitiesDefault(int val) {
8727 int old = xmlSubstituteEntitiesDefaultValue;
8728
8729 xmlSubstituteEntitiesDefaultValue = val;
8730 return(old);
8731}
8732