blob: 1b353fb03b58e0fee8a2813770da12893241fae7 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
13#include <config.h>
14#endif
15#include <stdio.h>
16#include <ctype.h>
17#include <string.h> /* for memset() only */
Seth Alvese7f12e61998-10-01 20:51:15 +000018#include <stdlib.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000019#include <sys/stat.h>
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_ZLIB_H
27#include <zlib.h>
28#endif
29
Daniel Veillard6454aec1999-09-02 22:04:43 +000030#include "xmlmemory.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000031#include "tree.h"
32#include "parser.h"
33#include "entities.h"
Daniel Veillard27d88741999-05-29 11:51:49 +000034#include "encoding.h"
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000035#include "valid.h"
Daniel Veillard1e346af1999-02-22 10:33:01 +000036#include "parserInternals.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000037#include "xmlIO.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000038
Daniel Veillard14fff061999-06-22 21:49:07 +000039const char *xmlParserVersion = LIBXML_VERSION;
40
Daniel Veillarde2d034d1999-07-27 19:52:06 +000041
42/************************************************************************
43 * *
44 * Input handling functions for progressive parsing *
45 * *
46 ************************************************************************/
47
48/* #define DEBUG_INPUT */
49
Daniel Veillardb05deb71999-08-10 19:04:08 +000050#define INPUT_CHUNK 250
51/* we need to keep enough input to show errors in context */
52#define LINE_LEN 80
Daniel Veillarde2d034d1999-07-27 19:52:06 +000053
54#ifdef DEBUG_INPUT
55#define CHECK_BUFFER(in) check_buffer(in)
Daniel Veillarde2d034d1999-07-27 19:52:06 +000056
57void check_buffer(xmlParserInputPtr in) {
58 if (in->base != in->buf->buffer->content) {
59 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
60 }
61 if (in->cur < in->base) {
62 fprintf(stderr, "xmlParserInput: cur < base problem\n");
63 }
64 if (in->cur > in->base + in->buf->buffer->use) {
65 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
66 }
67 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
68 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
69 in->buf->buffer->use, in->buf->buffer->size);
70}
71
Daniel Veillardb05deb71999-08-10 19:04:08 +000072#else
73#define CHECK_BUFFER(in)
74#endif
75
Daniel Veillarde2d034d1999-07-27 19:52:06 +000076
77/**
78 * xmlParserInputRead:
79 * @in: an XML parser input
80 * @len: an indicative size for the lookahead
81 *
82 * This function refresh the input for the parser. It doesn't try to
83 * preserve pointers to the input buffer, and discard already read data
84 *
85 * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
86 * end of this entity
87 */
88int
89xmlParserInputRead(xmlParserInputPtr in, int len) {
90 int ret;
91 int used;
92 int index;
93
94#ifdef DEBUG_INPUT
95 fprintf(stderr, "Read\n");
96#endif
97 if (in->buf == NULL) return(-1);
98 if (in->base == NULL) return(-1);
99 if (in->cur == NULL) return(-1);
100 if (in->buf->buffer == NULL) return(-1);
101
102 CHECK_BUFFER(in);
103
104 used = in->cur - in->buf->buffer->content;
105 ret = xmlBufferShrink(in->buf->buffer, used);
106 if (ret > 0) {
107 in->cur -= ret;
108 in->consumed += ret;
109 }
110 ret = xmlParserInputBufferRead(in->buf, len);
111 if (in->base != in->buf->buffer->content) {
112 /*
113 * the buffer has been realloced
114 */
115 index = in->cur - in->base;
116 in->base = in->buf->buffer->content;
117 in->cur = &in->buf->buffer->content[index];
118 }
119
120 CHECK_BUFFER(in);
121
122 return(ret);
123}
124
125/**
126 * xmlParserInputGrow:
127 * @in: an XML parser input
128 * @len: an indicative size for the lookahead
129 *
130 * This function increase the input for the parser. It tries to
131 * preserve pointers to the input buffer, and keep already read data
132 *
133 * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
134 * end of this entity
135 */
136int
137xmlParserInputGrow(xmlParserInputPtr in, int len) {
138 int ret;
139 int index;
140
141#ifdef DEBUG_INPUT
142 fprintf(stderr, "Grow\n");
143#endif
144 if (in->buf == NULL) return(-1);
145 if (in->base == NULL) return(-1);
146 if (in->cur == NULL) return(-1);
147 if (in->buf->buffer == NULL) return(-1);
148
149 CHECK_BUFFER(in);
150
151 index = in->cur - in->base;
152 if (in->buf->buffer->use > index + INPUT_CHUNK) {
153
154 CHECK_BUFFER(in);
155
156 return(0);
157 }
158 ret = xmlParserInputBufferGrow(in->buf, len);
159 if (in->base != in->buf->buffer->content) {
160 /*
161 * the buffer has been realloced
162 */
163 index = in->cur - in->base;
164 in->base = in->buf->buffer->content;
165 in->cur = &in->buf->buffer->content[index];
166 }
167
168 CHECK_BUFFER(in);
169
170 return(ret);
171}
172
173/**
174 * xmlParserInputShrink:
175 * @in: an XML parser input
176 *
177 * This function removes used input for the parser.
178 */
179void
180xmlParserInputShrink(xmlParserInputPtr in) {
181 int used;
182 int ret;
183 int index;
184
185#ifdef DEBUG_INPUT
186 fprintf(stderr, "Shrink\n");
187#endif
188 if (in->buf == NULL) return;
189 if (in->base == NULL) return;
190 if (in->cur == NULL) return;
191 if (in->buf->buffer == NULL) return;
192
193 CHECK_BUFFER(in);
194
195 used = in->cur - in->buf->buffer->content;
196 if (used > INPUT_CHUNK) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000197 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000198 if (ret > 0) {
199 in->cur -= ret;
200 in->consumed += ret;
201 }
202 }
203
204 CHECK_BUFFER(in);
205
206 if (in->buf->buffer->use > INPUT_CHUNK) {
207 return;
208 }
209 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
210 if (in->base != in->buf->buffer->content) {
211 /*
212 * the buffer has been realloced
213 */
214 index = in->cur - in->base;
215 in->base = in->buf->buffer->content;
216 in->cur = &in->buf->buffer->content[index];
217 }
218
219 CHECK_BUFFER(in);
220}
221
Daniel Veillard260a68f1998-08-13 03:39:55 +0000222/************************************************************************
223 * *
224 * Parser stacks related functions and macros *
225 * *
226 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000227
228int xmlSubstituteEntitiesDefaultValue = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000229int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000230
Daniel Veillard260a68f1998-08-13 03:39:55 +0000231/*
232 * Generic function for accessing stacks in the Parser Context
233 */
234
235#define PUSH_AND_POP(type, name) \
Daniel Veillard517752b1999-04-05 12:20:10 +0000236extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000237 if (ctxt->name##Nr >= ctxt->name##Max) { \
238 ctxt->name##Max *= 2; \
Daniel Veillard6454aec1999-09-02 22:04:43 +0000239 ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000240 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
241 if (ctxt->name##Tab == NULL) { \
242 fprintf(stderr, "realloc failed !\n"); \
243 exit(1); \
244 } \
245 } \
246 ctxt->name##Tab[ctxt->name##Nr] = value; \
247 ctxt->name = value; \
248 return(ctxt->name##Nr++); \
249} \
Daniel Veillard517752b1999-04-05 12:20:10 +0000250extern type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000251 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000252 if (ctxt->name##Nr <= 0) return(0); \
253 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000254 if (ctxt->name##Nr > 0) \
255 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
256 else \
257 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000258 ret = ctxt->name##Tab[ctxt->name##Nr]; \
259 ctxt->name##Tab[ctxt->name##Nr] = 0; \
260 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000261} \
262
263PUSH_AND_POP(xmlParserInputPtr, input)
264PUSH_AND_POP(xmlNodePtr, node)
265
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000266/*
267 * Macros for accessing the content. Those should be used only by the parser,
268 * and not exported.
269 *
270 * Dirty macros, i.e. one need to make assumption on the context to use them
271 *
272 * CUR_PTR return the current pointer to the CHAR to be parsed.
273 * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
274 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
275 * in UNICODE mode. This should be used internally by the parser
276 * only to compare to ASCII values otherwise it would break when
277 * running with UTF-8 encoding.
278 * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
279 * to compare on ASCII based substring.
280 * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
281 * strings within the parser.
282 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000283 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000284 *
285 * CURRENT Returns the current char value, with the full decoding of
286 * UTF-8 if we are using this mode. It returns an int.
287 * NEXT Skip to the next character, this does the proper decoding
288 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard011b63c1999-06-02 17:44:04 +0000289 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000290 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000291
Daniel Veillardb05deb71999-08-10 19:04:08 +0000292#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000293#define SKIP(val) ctxt->input->cur += (val)
294#define NXT(val) ctxt->input->cur[(val)]
295#define CUR_PTR ctxt->input->cur
Daniel Veillardb05deb71999-08-10 19:04:08 +0000296#define SHRINK xmlParserInputShrink(ctxt->input); \
297 if ((*ctxt->input->cur == 0) && \
298 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
299 xmlPopInput(ctxt)
300
301#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
302 if ((*ctxt->input->cur == 0) && \
303 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
304 xmlPopInput(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000305
306#define SKIP_BLANKS \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000307 do { \
308 while (IS_BLANK(CUR)) NEXT; \
309 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
310 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
311 } while (IS_BLANK(CUR));
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000312
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000313#define CURRENT (*ctxt->input->cur)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000314#define NEXT { \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000315 if (ctxt->token != 0) ctxt->token = 0; \
316 else { \
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000317 if ((*ctxt->input->cur == 0) && \
318 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
319 xmlPopInput(ctxt); \
320 } else { \
321 if (*(ctxt->input->cur) == '\n') { \
322 ctxt->input->line++; ctxt->input->col = 1; \
323 } else ctxt->input->col++; \
324 ctxt->input->cur++; \
325 if (*ctxt->input->cur == 0) \
326 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000327 } \
328 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
329 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
330}}
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000331
Daniel Veillard260a68f1998-08-13 03:39:55 +0000332
Daniel Veillardb05deb71999-08-10 19:04:08 +0000333/************************************************************************
334 * *
335 * Commodity functions to handle entities processing *
336 * *
337 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +0000338
Daniel Veillard11e00581998-10-24 18:27:49 +0000339/**
340 * xmlPopInput:
341 * @ctxt: an XML parser context
342 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000343 * xmlPopInput: the current input pointed by ctxt->input came to an end
344 * pop it and return the next char.
345 *
Daniel Veillard1e346af1999-02-22 10:33:01 +0000346 * Returns the current CHAR in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000347 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000348CHAR
349xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000350 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardbc50b591999-03-01 12:28:53 +0000351 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000352 if ((*ctxt->input->cur == 0) &&
353 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
354 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000355 return(CUR);
356}
357
Daniel Veillard11e00581998-10-24 18:27:49 +0000358/**
359 * xmlPushInput:
360 * @ctxt: an XML parser context
361 * @input: an XML parser input fragment (entity, XML fragment ...).
362 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000363 * xmlPushInput: switch to a new input stream which is stacked on top
364 * of the previous one(s).
365 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000366void
367xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000368 if (input == NULL) return;
369 inputPush(ctxt, input);
370}
371
Daniel Veillard11e00581998-10-24 18:27:49 +0000372/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000373 * xmlFreeInputStream:
Daniel Veillardb05deb71999-08-10 19:04:08 +0000374 * @input: an xmlP arserInputPtr
Daniel Veillardd692aa41999-02-28 21:54:31 +0000375 *
376 * Free up an input stream.
377 */
378void
379xmlFreeInputStream(xmlParserInputPtr input) {
380 if (input == NULL) return;
381
Daniel Veillard6454aec1999-09-02 22:04:43 +0000382 if (input->filename != NULL) xmlFree((char *) input->filename);
383 if (input->directory != NULL) xmlFree((char *) input->directory);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000384 if ((input->free != NULL) && (input->base != NULL))
Daniel Veillardb96e6431999-08-29 21:02:19 +0000385 input->free((CHAR *) input->base);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000386 if (input->buf != NULL)
387 xmlFreeParserInputBuffer(input->buf);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000388 memset(input, -1, sizeof(xmlParserInput));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000389 xmlFree(input);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000390}
391
392/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000393 * xmlNewInputStream:
394 * @ctxt: an XML parser context
395 *
396 * Create a new input stream structure
397 * Returns the new input stream or NULL
398 */
399xmlParserInputPtr
400xmlNewInputStream(xmlParserCtxtPtr ctxt) {
401 xmlParserInputPtr input;
402
Daniel Veillard6454aec1999-09-02 22:04:43 +0000403 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000404 if (input == NULL) {
405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406 ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
407 return(NULL);
408 }
409 input->filename = NULL;
410 input->directory = NULL;
411 input->base = NULL;
412 input->cur = NULL;
413 input->buf = NULL;
414 input->line = 1;
415 input->col = 1;
416 input->buf = NULL;
417 input->free = NULL;
418 input->consumed = 0;
419 return(input);
420}
421
422/**
Daniel Veillard11e00581998-10-24 18:27:49 +0000423 * xmlNewEntityInputStream:
424 * @ctxt: an XML parser context
425 * @entity: an Entity pointer
426 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000427 * Create a new input stream based on an xmlEntityPtr
Daniel Veillardb96e6431999-08-29 21:02:19 +0000428 *
429 * Returns the new input stream or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000430 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000431xmlParserInputPtr
432xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000433 xmlParserInputPtr input;
434
435 if (entity == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000437 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000438 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000439 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000440 }
441 if (entity->content == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +0000442 switch (entity->type) {
443 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
445 ctxt->sax->error(ctxt->userData,
446 "xmlNewEntityInputStream unparsed entity !\n");
447 break;
448 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
449 case XML_EXTERNAL_PARAMETER_ENTITY:
450 return(xmlLoadExternalEntity((char *) entity->SystemID,
451 (char *) entity->ExternalID, ctxt->input));
452 case XML_INTERNAL_GENERAL_ENTITY:
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "Internal entity %s without content !\n", entity->name);
456 break;
457 case XML_INTERNAL_PARAMETER_ENTITY:
458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
459 ctxt->sax->error(ctxt->userData,
460 "Internal parameter entity %s without content !\n", entity->name);
461 break;
462 case XML_INTERNAL_PREDEFINED_ENTITY:
463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
464 ctxt->sax->error(ctxt->userData,
465 "Predefined entity %s without content !\n", entity->name);
466 break;
467 }
Daniel Veillardccb09631998-10-27 06:21:04 +0000468 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000469 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000470 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000471 if (input == NULL) {
Daniel Veillardccb09631998-10-27 06:21:04 +0000472 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000473 }
Daniel Veillardb96e6431999-08-29 21:02:19 +0000474 input->filename = (char *) entity->SystemID; /* TODO !!! char <- CHAR */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000475 input->base = entity->content;
476 input->cur = entity->content;
Daniel Veillardccb09631998-10-27 06:21:04 +0000477 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000478}
479
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000480/**
481 * xmlNewStringInputStream:
482 * @ctxt: an XML parser context
Daniel Veillardb05deb71999-08-10 19:04:08 +0000483 * @buffer: an memory buffer
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000484 *
485 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000486 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000487 */
488xmlParserInputPtr
Daniel Veillardb05deb71999-08-10 19:04:08 +0000489xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000490 xmlParserInputPtr input;
491
Daniel Veillardb05deb71999-08-10 19:04:08 +0000492 if (buffer == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000494 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000495 "internal: xmlNewStringInputStream string = NULL\n");
496 return(NULL);
497 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000498 input = xmlNewInputStream(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000499 if (input == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000500 return(NULL);
501 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000502 input->base = buffer;
503 input->cur = buffer;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000504 return(input);
505}
506
Daniel Veillard011b63c1999-06-02 17:44:04 +0000507/**
508 * xmlNewInputFromFile:
509 * @ctxt: an XML parser context
510 * @filename: the filename to use as entity
511 *
512 * Create a new input stream based on a file.
513 *
514 * Returns the new input stream or NULL in case of error
515 */
516xmlParserInputPtr
517xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000518 xmlParserInputBufferPtr buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000519 xmlParserInputPtr inputStream;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000520 char *directory = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000521
Daniel Veillardb05deb71999-08-10 19:04:08 +0000522 if (ctxt == NULL) return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000523 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000524 if (buf == NULL) {
525 char name[1024];
Daniel Veillard011b63c1999-06-02 17:44:04 +0000526
Daniel Veillardb05deb71999-08-10 19:04:08 +0000527 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
528#ifdef WIN32
529 sprintf(name, "%s\\%s", ctxt->input->directory, filename);
530#else
531 sprintf(name, "%s/%s", ctxt->input->directory, filename);
532#endif
533 buf = xmlParserInputBufferCreateFilename(name,
534 XML_CHAR_ENCODING_NONE);
535 if (buf != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +0000536 directory = xmlMemStrdup(ctxt->input->directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000537 }
538 if ((buf == NULL) && (ctxt->directory != NULL)) {
539#ifdef WIN32
540 sprintf(name, "%s\\%s", ctxt->directory, filename);
541#else
542 sprintf(name, "%s/%s", ctxt->directory, filename);
543#endif
544 buf = xmlParserInputBufferCreateFilename(name,
545 XML_CHAR_ENCODING_NONE);
546 if (buf != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +0000547 directory = xmlMemStrdup(ctxt->directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000548 }
549 if (buf == NULL)
550 return(NULL);
551 }
552 if (directory == NULL)
553 directory = xmlParserGetDirectory(filename);
554
555 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000556 if (inputStream == NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +0000557 if (directory != NULL) xmlFree((char *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000558 return(NULL);
559 }
560
Daniel Veillard6454aec1999-09-02 22:04:43 +0000561 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000562 inputStream->directory = directory;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000563 inputStream->buf = buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000564
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000565 inputStream->base = inputStream->buf->buffer->content;
566 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000567 if ((ctxt->directory == NULL) && (directory != NULL))
568 ctxt->directory = directory;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000569 return(inputStream);
570}
571
572/************************************************************************
573 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +0000574 * Commodity functions to handle parser contexts *
575 * *
576 ************************************************************************/
577
578/**
579 * xmlInitParserCtxt:
580 * @ctxt: an XML parser context
581 *
582 * Initialize a parser context
583 */
584
585void
586xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
587{
588 xmlSAXHandler *sax;
589
Daniel Veillard6454aec1999-09-02 22:04:43 +0000590 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000591 if (sax == NULL) {
592 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
593 }
594
595 /* Allocate the Input stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +0000596 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000597 ctxt->inputNr = 0;
598 ctxt->inputMax = 5;
599 ctxt->input = NULL;
600 ctxt->version = NULL;
601 ctxt->encoding = NULL;
602 ctxt->standalone = -1;
603 ctxt->hasExternalSubset = 0;
604 ctxt->hasPErefs = 0;
605 ctxt->html = 0;
606 ctxt->external = 0;
607 ctxt->instate = XML_PARSER_PROLOG;
608 ctxt->token = 0;
609 ctxt->directory = NULL;
610
611 /* Allocate the Node stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +0000612 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000613 ctxt->nodeNr = 0;
614 ctxt->nodeMax = 10;
615 ctxt->node = NULL;
616
617 if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
618 else {
619 ctxt->sax = sax;
620 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
621 }
622 ctxt->userData = ctxt;
623 ctxt->myDoc = NULL;
624 ctxt->wellFormed = 1;
625 ctxt->valid = 1;
626 ctxt->validate = xmlDoValidityCheckingDefaultValue;
627 ctxt->vctxt.userData = ctxt;
628 ctxt->vctxt.error = xmlParserValidityError;
629 ctxt->vctxt.warning = xmlParserValidityWarning;
630 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
631 ctxt->record_info = 0;
632 xmlInitNodeInfoSeq(&ctxt->node_seq);
633}
634
635/**
636 * xmlFreeParserCtxt:
637 * @ctxt: an XML parser context
638 *
639 * Free all the memory used by a parser context. However the parsed
640 * document in ctxt->myDoc is not freed.
641 */
642
643void
644xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
645{
646 xmlParserInputPtr input;
647
648 if (ctxt == NULL) return;
649
650 while ((input = inputPop(ctxt)) != NULL) {
651 xmlFreeInputStream(input);
652 }
653
Daniel Veillard6454aec1999-09-02 22:04:43 +0000654 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
655 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
656 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
657 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000658 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
Daniel Veillard6454aec1999-09-02 22:04:43 +0000659 xmlFree(ctxt->sax);
660 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
661 xmlFree(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000662}
663
664/**
665 * xmlNewParserCtxt:
666 *
667 * Allocate and initialize a new parser context.
668 *
669 * Returns the xmlParserCtxtPtr or NULL
670 */
671
672xmlParserCtxtPtr
673xmlNewParserCtxt()
674{
675 xmlParserCtxtPtr ctxt;
676
Daniel Veillard6454aec1999-09-02 22:04:43 +0000677 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000678 if (ctxt == NULL) {
679 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
680 perror("malloc");
681 return(NULL);
682 }
683 xmlInitParserCtxt(ctxt);
684 return(ctxt);
685}
686
687/**
688 * xmlClearParserCtxt:
689 * @ctxt: an XML parser context
690 *
691 * Clear (release owned resources) and reinitialize a parser context
692 */
693
694void
695xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
696{
697 xmlClearNodeInfoSeq(&ctxt->node_seq);
698 xmlInitParserCtxt(ctxt);
699}
700
701/************************************************************************
702 * *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000703 * Commodity functions to handle entities *
704 * *
705 ************************************************************************/
706
Daniel Veillardb05deb71999-08-10 19:04:08 +0000707void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
708void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
709
710/**
711 * xmlParseCharRef:
712 * @ctxt: an XML parser context
713 *
714 * parse Reference declarations
715 *
716 * [66] CharRef ::= '&#' [0-9]+ ';' |
717 * '&#x' [0-9a-fA-F]+ ';'
718 *
719 * [ WFC: Legal Character ]
720 * Characters referred to using character references must match the
721 * production for Char.
722 *
723 * Returns the value parsed (as an int)
724 */
725int
726xmlParseCharRef(xmlParserCtxtPtr ctxt) {
727 int val = 0;
728
729 if (ctxt->token != 0) {
730 val = ctxt->token;
731 ctxt->token = 0;
732 return(val);
733 }
734 if ((CUR == '&') && (NXT(1) == '#') &&
735 (NXT(2) == 'x')) {
736 SKIP(3);
737 while (CUR != ';') {
738 if ((CUR >= '0') && (CUR <= '9'))
739 val = val * 16 + (CUR - '0');
740 else if ((CUR >= 'a') && (CUR <= 'f'))
741 val = val * 16 + (CUR - 'a') + 10;
742 else if ((CUR >= 'A') && (CUR <= 'F'))
743 val = val * 16 + (CUR - 'A') + 10;
744 else {
745 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
746 ctxt->sax->error(ctxt->userData,
747 "xmlParseCharRef: invalid hexadecimal value\n");
748 ctxt->wellFormed = 0;
749 val = 0;
750 break;
751 }
752 NEXT;
753 }
754 if (CUR == ';')
755 NEXT;
756 } else if ((CUR == '&') && (NXT(1) == '#')) {
757 SKIP(2);
758 while (CUR != ';') {
759 if ((CUR >= '0') && (CUR <= '9'))
760 val = val * 10 + (CUR - '0');
761 else {
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763 ctxt->sax->error(ctxt->userData,
764 "xmlParseCharRef: invalid decimal value\n");
765 ctxt->wellFormed = 0;
766 val = 0;
767 break;
768 }
769 NEXT;
770 }
771 if (CUR == ';')
772 NEXT;
773 } else {
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
775 ctxt->sax->error(ctxt->userData,
776 "xmlParseCharRef: invalid value\n");
777 ctxt->wellFormed = 0;
778 }
779
780 /*
781 * [ WFC: Legal Character ]
782 * Characters referred to using character references must match the
783 * production for Char.
784 */
785 if (IS_CHAR(val)) {
786 return(val);
787 } else {
788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
789 ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
790 val);
791 ctxt->wellFormed = 0;
792 }
793 return(0);
794}
795
796/**
797 * xmlParserHandleReference:
798 * @ctxt: the parser context
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [68] EntityRef ::= '&' Name ';'
803 *
804 * [ WFC: Entity Declared ]
805 * the Name given in the entity reference must match that in an entity
806 * declaration, except that well-formed documents need not declare any
807 * of the following entities: amp, lt, gt, apos, quot.
808 *
809 * [ WFC: Parsed Entity ]
810 * An entity reference must not contain the name of an unparsed entity
811 *
812 * [66] CharRef ::= '&#' [0-9]+ ';' |
813 * '&#x' [0-9a-fA-F]+ ';'
814 *
815 * A PEReference may have been detectect in the current input stream
816 * the handling is done accordingly to
817 * http://www.w3.org/TR/REC-xml#entproc
818 */
819void
820xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
821 xmlParserInputPtr input;
822 CHAR *name;
823 xmlEntityPtr ent = NULL;
824
825 if (ctxt->token != 0) return;
826 if (CUR != '&') return;
827 GROW;
828 if ((CUR == '&') && (NXT(1) == '#')) {
829 switch(ctxt->instate) {
830 case XML_PARSER_CDATA_SECTION:
831 return;
832 case XML_PARSER_COMMENT:
833 return;
834 case XML_PARSER_EOF:
835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
837 ctxt->wellFormed = 0;
838 return;
839 case XML_PARSER_PROLOG:
840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
841 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
842 ctxt->wellFormed = 0;
843 return;
844 case XML_PARSER_EPILOG:
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
847 ctxt->wellFormed = 0;
848 return;
849 case XML_PARSER_DTD:
850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
851 ctxt->sax->error(ctxt->userData,
852 "CharRef are forbiden in DTDs!\n");
853 ctxt->wellFormed = 0;
854 return;
855 case XML_PARSER_ENTITY_DECL:
856 /* we just ignore it there */
857 return;
858 case XML_PARSER_ENTITY_VALUE:
859 /*
860 * NOTE: in the case of entity values, we don't do the
861 * substitution here since we need the litteral
862 * entity value to be able to save the internal
863 * subset of the document.
864 * This will be handled by xmlDecodeEntities
865 */
866 return;
867 case XML_PARSER_CONTENT:
868 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardb96e6431999-08-29 21:02:19 +0000869 /* !!! this may not be Ok for UTF-8, multibyte sequence */
Daniel Veillardb05deb71999-08-10 19:04:08 +0000870 ctxt->token = xmlParseCharRef(ctxt);
871 return;
872 }
873 return;
874 }
875
876 switch(ctxt->instate) {
877 case XML_PARSER_CDATA_SECTION:
878 return;
879 case XML_PARSER_COMMENT:
880 return;
881 case XML_PARSER_EOF:
882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
883 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
884 ctxt->wellFormed = 0;
885 return;
886 case XML_PARSER_PROLOG:
887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
888 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
889 ctxt->wellFormed = 0;
890 return;
891 case XML_PARSER_EPILOG:
892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
893 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
894 ctxt->wellFormed = 0;
895 return;
896 case XML_PARSER_ENTITY_VALUE:
897 /*
898 * NOTE: in the case of entity values, we don't do the
899 * substitution here since we need the litteral
900 * entity value to be able to save the internal
901 * subset of the document.
902 * This will be handled by xmlDecodeEntities
903 */
904 return;
905 case XML_PARSER_ATTRIBUTE_VALUE:
906 /*
907 * NOTE: in the case of attributes values, we don't do the
908 * substitution here unless we are in a mode where
909 * the parser is explicitely asked to substitute
910 * entities. The SAX callback is called with values
911 * without entity substitution.
912 * This will then be handled by xmlDecodeEntities
913 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000914 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000915 case XML_PARSER_ENTITY_DECL:
916 /*
917 * we just ignore it there
918 * the substitution will be done once the entity is referenced
919 */
920 return;
921 case XML_PARSER_DTD:
922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
923 ctxt->sax->error(ctxt->userData,
924 "Entity references are forbiden in DTDs!\n");
925 ctxt->wellFormed = 0;
926 return;
927 case XML_PARSER_CONTENT:
Daniel Veillardb96e6431999-08-29 21:02:19 +0000928 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000929 }
930
931 NEXT;
932 name = xmlScanName(ctxt);
933 if (name == NULL) {
934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
935 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
936 ctxt->wellFormed = 0;
937 ctxt->token = '&';
938 return;
939 }
940 if (NXT(xmlStrlen(name)) != ';') {
941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
942 ctxt->sax->error(ctxt->userData,
943 "Entity reference: ';' expected\n");
944 ctxt->wellFormed = 0;
945 ctxt->token = '&';
Daniel Veillard6454aec1999-09-02 22:04:43 +0000946 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000947 return;
948 }
949 SKIP(xmlStrlen(name) + 1);
950 if (ctxt->sax != NULL) {
951 if (ctxt->sax->getEntity != NULL)
952 ent = ctxt->sax->getEntity(ctxt->userData, name);
953 }
954
955 /*
956 * [ WFC: Entity Declared ]
957 * the Name given in the entity reference must match that in an entity
958 * declaration, except that well-formed documents need not declare any
959 * of the following entities: amp, lt, gt, apos, quot.
960 */
961 if (ent == NULL)
962 ent = xmlGetPredefinedEntity(name);
963 if (ent == NULL) {
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "Entity reference: entity %s not declared\n",
967 name);
968 ctxt->wellFormed = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000969 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000970 return;
971 }
972
973 /*
974 * [ WFC: Parsed Entity ]
975 * An entity reference must not contain the name of an unparsed entity
976 */
977 if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
979 ctxt->sax->error(ctxt->userData,
980 "Entity reference to unparsed entity %s\n", name);
981 ctxt->wellFormed = 0;
982 }
983
984 if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
985 ctxt->token = ent->content[0];
Daniel Veillard6454aec1999-09-02 22:04:43 +0000986 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000987 return;
988 }
989 input = xmlNewEntityInputStream(ctxt, ent);
990 xmlPushInput(ctxt, input);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000991 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000992 return;
993}
994
995/**
996 * xmlParserHandlePEReference:
997 * @ctxt: the parser context
998 *
999 * [69] PEReference ::= '%' Name ';'
1000 *
1001 * [ WFC: No Recursion ]
1002 * TODO A parsed entity must not contain a recursive
1003 * reference to itself, either directly or indirectly.
1004 *
1005 * [ WFC: Entity Declared ]
1006 * In a document without any DTD, a document with only an internal DTD
1007 * subset which contains no parameter entity references, or a document
1008 * with "standalone='yes'", ... ... The declaration of a parameter
1009 * entity must precede any reference to it...
1010 *
1011 * [ VC: Entity Declared ]
1012 * In a document with an external subset or external parameter entities
1013 * with "standalone='no'", ... ... The declaration of a parameter entity
1014 * must precede any reference to it...
1015 *
1016 * [ WFC: In DTD ]
1017 * Parameter-entity references may only appear in the DTD.
1018 * NOTE: misleading but this is handled.
1019 *
1020 * A PEReference may have been detected in the current input stream
1021 * the handling is done accordingly to
1022 * http://www.w3.org/TR/REC-xml#entproc
1023 * i.e.
1024 * - Included in literal in entity values
1025 * - Included as Paraemeter Entity reference within DTDs
1026 */
1027void
1028xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1029 CHAR *name;
1030 xmlEntityPtr entity = NULL;
1031 xmlParserInputPtr input;
1032
1033 if (ctxt->token != 0) return;
1034 if (CUR != '%') return;
1035 switch(ctxt->instate) {
1036 case XML_PARSER_CDATA_SECTION:
1037 return;
1038 case XML_PARSER_COMMENT:
1039 return;
1040 case XML_PARSER_EOF:
1041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1042 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1043 ctxt->wellFormed = 0;
1044 return;
1045 case XML_PARSER_PROLOG:
1046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1047 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1048 ctxt->wellFormed = 0;
1049 return;
1050 case XML_PARSER_ENTITY_DECL:
1051 case XML_PARSER_CONTENT:
1052 case XML_PARSER_ATTRIBUTE_VALUE:
1053 /* we just ignore it there */
1054 return;
1055 case XML_PARSER_EPILOG:
1056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1057 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1058 ctxt->wellFormed = 0;
1059 return;
1060 case XML_PARSER_ENTITY_VALUE:
1061 /*
1062 * NOTE: in the case of entity values, we don't do the
1063 * substitution here since we need the litteral
1064 * entity value to be able to save the internal
1065 * subset of the document.
1066 * This will be handled by xmlDecodeEntities
1067 */
1068 return;
1069 case XML_PARSER_DTD:
1070 /*
1071 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1072 * In the internal DTD subset, parameter-entity references
1073 * can occur only where markup declarations can occur, not
1074 * within markup declarations.
1075 * In that case this is handled in xmlParseMarkupDecl
1076 */
1077 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1078 return;
1079 }
1080
1081 NEXT;
1082 name = xmlParseName(ctxt);
1083 if (name == NULL) {
1084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1085 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1086 ctxt->wellFormed = 0;
1087 } else {
1088 if (CUR == ';') {
1089 NEXT;
1090 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1091 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1092 if (entity == NULL) {
1093
1094 /*
1095 * [ WFC: Entity Declared ]
1096 * In a document without any DTD, a document with only an
1097 * internal DTD subset which contains no parameter entity
1098 * references, or a document with "standalone='yes'", ...
1099 * ... The declaration of a parameter entity must precede
1100 * any reference to it...
1101 */
1102 if ((ctxt->standalone == 1) ||
1103 ((ctxt->hasExternalSubset == 0) &&
1104 (ctxt->hasPErefs == 0))) {
1105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1106 ctxt->sax->error(ctxt->userData,
1107 "PEReference: %%%s; not found\n", name);
1108 ctxt->wellFormed = 0;
1109 } else {
1110 /*
1111 * [ VC: Entity Declared ]
1112 * In a document with an external subset or external
1113 * parameter entities with "standalone='no'", ...
1114 * ... The declaration of a parameter entity must precede
1115 * any reference to it...
1116 */
1117 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1118 ctxt->sax->warning(ctxt->userData,
1119 "PEReference: %%%s; not found\n", name);
1120 ctxt->valid = 0;
1121 }
1122 } else {
1123 if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1124 (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1125 /*
1126 * TODO !!!! handle the extra spaces added before and after
1127 * c.f. http://www.w3.org/TR/REC-xml#as-PE
1128 * TODO !!!! Avoid quote processing in parameters value
1129 * c.f. http://www.w3.org/TR/REC-xml#inliteral
1130 */
1131 input = xmlNewEntityInputStream(ctxt, entity);
1132 xmlPushInput(ctxt, input);
1133 } else {
1134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1135 ctxt->sax->error(ctxt->userData,
1136 "xmlHandlePEReference: %s is not a parameter entity\n",
1137 name);
1138 ctxt->wellFormed = 0;
1139 }
1140 }
1141 } else {
1142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1143 ctxt->sax->error(ctxt->userData,
1144 "xmlHandlePEReference: expecting ';'\n");
1145 ctxt->wellFormed = 0;
1146 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00001147 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001148 }
1149}
1150
Daniel Veillard011b63c1999-06-02 17:44:04 +00001151/*
1152 * Macro used to grow the current buffer.
1153 */
1154#define growBuffer(buffer) { \
1155 buffer##_size *= 2; \
Daniel Veillard6454aec1999-09-02 22:04:43 +00001156 buffer = (CHAR *) xmlRealloc(buffer, buffer##_size * sizeof(CHAR)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00001157 if (buffer == NULL) { \
1158 perror("realloc failed"); \
1159 exit(1); \
1160 } \
1161}
1162
Daniel Veillard011b63c1999-06-02 17:44:04 +00001163/**
1164 * xmlDecodeEntities:
1165 * @ctxt: the parser context
1166 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1167 * @len: the len to decode (in bytes !), -1 for no size limit
1168 * @end: an end marker CHAR, 0 if none
1169 * @end2: an end marker CHAR, 0 if none
1170 * @end3: an end marker CHAR, 0 if none
1171 *
1172 * [67] Reference ::= EntityRef | CharRef
1173 *
1174 * [69] PEReference ::= '%' Name ';'
1175 *
1176 * Returns A newly allocated string with the substitution done. The caller
1177 * must deallocate it !
1178 */
1179CHAR *
1180xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1181 CHAR end, CHAR end2, CHAR end3) {
1182 CHAR *buffer = NULL;
1183 int buffer_size = 0;
1184 CHAR *out = NULL;
1185
Daniel Veillardb05deb71999-08-10 19:04:08 +00001186 CHAR *current = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001187 xmlEntityPtr ent;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001188 int nbchars = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001189 unsigned int max = (unsigned int) len;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001190 CHAR cur;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001191
1192 /*
1193 * allocate a translation buffer.
1194 */
1195 buffer_size = 1000;
Daniel Veillard6454aec1999-09-02 22:04:43 +00001196 buffer = (CHAR *) xmlMalloc(buffer_size * sizeof(CHAR));
Daniel Veillard011b63c1999-06-02 17:44:04 +00001197 if (buffer == NULL) {
1198 perror("xmlDecodeEntities: malloc failed");
1199 return(NULL);
1200 }
1201 out = buffer;
1202
1203 /*
1204 * Ok loop until we reach one of the ending char or a size limit.
1205 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001206 cur = CUR;
1207 while ((nbchars < max) && (cur != end) &&
1208 (cur != end2) && (cur != end3)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001209
Daniel Veillardb05deb71999-08-10 19:04:08 +00001210 if (cur == 0) break;
1211 if ((cur == '&') && (NXT(1) == '#')) {
1212 int val = xmlParseCharRef(ctxt);
1213 *out++ = val;
1214 nbchars += 3;
1215 } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1216 ent = xmlParseEntityRef(ctxt);
1217 if ((ent != NULL) &&
1218 (ctxt->replaceEntities != 0)) {
1219 current = ent->content;
1220 while (*current != 0) {
1221 *out++ = *current++;
1222 if (out - buffer > buffer_size - 100) {
1223 int index = out - buffer;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001224
Daniel Veillardb05deb71999-08-10 19:04:08 +00001225 growBuffer(buffer);
1226 out = &buffer[index];
Daniel Veillard011b63c1999-06-02 17:44:04 +00001227 }
1228 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001229 nbchars += 3 + xmlStrlen(ent->name);
1230 } else if (ent != NULL) {
1231 int i = xmlStrlen(ent->name);
1232 const CHAR *cur = ent->name;
1233
1234 nbchars += i + 2;
1235 *out++ = '&';
1236 if (out - buffer > buffer_size - i - 100) {
1237 int index = out - buffer;
1238
1239 growBuffer(buffer);
1240 out = &buffer[index];
1241 }
1242 for (;i > 0;i--)
1243 *out++ = *cur++;
1244 *out++ = ';';
Daniel Veillard011b63c1999-06-02 17:44:04 +00001245 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001246 } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001247 /*
1248 * a PEReference induce to switch the entity flow,
1249 * we break here to flush the current set of chars
1250 * parsed if any. We will be called back later.
1251 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001252 if (nbchars != 0) break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001253
1254 xmlParsePEReference(ctxt);
1255
1256 /*
1257 * Pop-up of finished entities.
1258 */
1259 while ((CUR == 0) && (ctxt->inputNr > 1))
1260 xmlPopInput(ctxt);
1261
Daniel Veillardb05deb71999-08-10 19:04:08 +00001262 break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001263 } else {
Daniel Veillardb96e6431999-08-29 21:02:19 +00001264 /* invalid for UTF-8 , use COPY(out); !!!!!! */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001265 *out++ = cur;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001266 nbchars++;
Raph Levien05240da1999-06-15 21:27:11 +00001267 if (out - buffer > buffer_size - 100) {
1268 int index = out - buffer;
1269
1270 growBuffer(buffer);
1271 out = &buffer[index];
1272 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00001273 NEXT;
1274 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001275 cur = CUR;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001276 }
1277 *out++ = 0;
1278 return(buffer);
1279}
1280
Daniel Veillard260a68f1998-08-13 03:39:55 +00001281
1282/************************************************************************
1283 * *
Daniel Veillard27d88741999-05-29 11:51:49 +00001284 * Commodity functions to handle encodings *
1285 * *
1286 ************************************************************************/
1287
1288/**
1289 * xmlSwitchEncoding:
1290 * @ctxt: the parser context
1291 * @len: the len of @cur
1292 *
1293 * change the input functions when discovering the character encoding
1294 * of a given entity.
1295 *
1296 */
1297void
1298xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1299{
1300 switch (enc) {
1301 case XML_CHAR_ENCODING_ERROR:
1302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1303 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1304 ctxt->wellFormed = 0;
1305 break;
1306 case XML_CHAR_ENCODING_NONE:
1307 /* let's assume it's UTF-8 without the XML decl */
1308 return;
1309 case XML_CHAR_ENCODING_UTF8:
1310 /* default encoding, no conversion should be needed */
1311 return;
1312 case XML_CHAR_ENCODING_UTF16LE:
1313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1314 ctxt->sax->error(ctxt->userData,
1315 "char encoding UTF16 little endian not supported\n");
1316 break;
1317 case XML_CHAR_ENCODING_UTF16BE:
1318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1319 ctxt->sax->error(ctxt->userData,
1320 "char encoding UTF16 big endian not supported\n");
1321 break;
1322 case XML_CHAR_ENCODING_UCS4LE:
1323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1324 ctxt->sax->error(ctxt->userData,
1325 "char encoding USC4 little endian not supported\n");
1326 break;
1327 case XML_CHAR_ENCODING_UCS4BE:
1328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1329 ctxt->sax->error(ctxt->userData,
1330 "char encoding USC4 big endian not supported\n");
1331 break;
1332 case XML_CHAR_ENCODING_EBCDIC:
1333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1334 ctxt->sax->error(ctxt->userData,
1335 "char encoding EBCDIC not supported\n");
1336 break;
1337 case XML_CHAR_ENCODING_UCS4_2143:
1338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1339 ctxt->sax->error(ctxt->userData,
1340 "char encoding UCS4 2143 not supported\n");
1341 break;
1342 case XML_CHAR_ENCODING_UCS4_3412:
1343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1344 ctxt->sax->error(ctxt->userData,
1345 "char encoding UCS4 3412 not supported\n");
1346 break;
1347 case XML_CHAR_ENCODING_UCS2:
1348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1349 ctxt->sax->error(ctxt->userData,
1350 "char encoding UCS2 not supported\n");
1351 break;
1352 case XML_CHAR_ENCODING_8859_1:
1353 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1354 ctxt->sax->error(ctxt->userData,
1355 "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1356 break;
1357 case XML_CHAR_ENCODING_8859_2:
1358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1359 ctxt->sax->error(ctxt->userData,
1360 "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1361 break;
1362 case XML_CHAR_ENCODING_8859_3:
1363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1364 ctxt->sax->error(ctxt->userData,
1365 "char encoding ISO_8859_3 not supported\n");
1366 break;
1367 case XML_CHAR_ENCODING_8859_4:
1368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1369 ctxt->sax->error(ctxt->userData,
1370 "char encoding ISO_8859_4 not supported\n");
1371 break;
1372 case XML_CHAR_ENCODING_8859_5:
1373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1374 ctxt->sax->error(ctxt->userData,
1375 "char encoding ISO_8859_5 not supported\n");
1376 break;
1377 case XML_CHAR_ENCODING_8859_6:
1378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1379 ctxt->sax->error(ctxt->userData,
1380 "char encoding ISO_8859_6 not supported\n");
1381 break;
1382 case XML_CHAR_ENCODING_8859_7:
1383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1384 ctxt->sax->error(ctxt->userData,
1385 "char encoding ISO_8859_7 not supported\n");
1386 break;
1387 case XML_CHAR_ENCODING_8859_8:
1388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1389 ctxt->sax->error(ctxt->userData,
1390 "char encoding ISO_8859_8 not supported\n");
1391 break;
1392 case XML_CHAR_ENCODING_8859_9:
1393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394 ctxt->sax->error(ctxt->userData,
1395 "char encoding ISO_8859_9 not supported\n");
1396 break;
1397 case XML_CHAR_ENCODING_2022_JP:
1398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1399 ctxt->sax->error(ctxt->userData,
1400 "char encoding ISO-2022-JPnot supported\n");
1401 break;
1402 case XML_CHAR_ENCODING_SHIFT_JIS:
1403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1404 ctxt->sax->error(ctxt->userData,
1405 "char encoding Shift_JISnot supported\n");
1406 break;
1407 case XML_CHAR_ENCODING_EUC_JP:
1408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1409 ctxt->sax->error(ctxt->userData,
1410 "char encoding EUC-JPnot supported\n");
1411 break;
1412 }
1413}
1414
1415/************************************************************************
1416 * *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001417 * Commodity functions to handle CHARs *
1418 * *
1419 ************************************************************************/
1420
Daniel Veillard11e00581998-10-24 18:27:49 +00001421/**
1422 * xmlStrndup:
1423 * @cur: the input CHAR *
1424 * @len: the len of @cur
1425 *
1426 * a strndup for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001427 *
1428 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001429 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001430CHAR *
1431xmlStrndup(const CHAR *cur, int len) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00001432 CHAR *ret = xmlMalloc((len + 1) * sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001433
1434 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001435 fprintf(stderr, "malloc of %ld byte failed\n",
1436 (len + 1) * (long)sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001437 return(NULL);
1438 }
1439 memcpy(ret, cur, len * sizeof(CHAR));
1440 ret[len] = 0;
1441 return(ret);
1442}
1443
Daniel Veillard11e00581998-10-24 18:27:49 +00001444/**
1445 * xmlStrdup:
1446 * @cur: the input CHAR *
1447 *
1448 * a strdup for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001449 *
1450 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001451 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001452CHAR *
1453xmlStrdup(const CHAR *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001454 const CHAR *p = cur;
1455
1456 while (IS_CHAR(*p)) p++;
1457 return(xmlStrndup(cur, p - cur));
1458}
1459
Daniel Veillard11e00581998-10-24 18:27:49 +00001460/**
1461 * xmlCharStrndup:
1462 * @cur: the input char *
1463 * @len: the len of @cur
1464 *
1465 * a strndup for char's to CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001466 *
1467 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001468 */
1469
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001470CHAR *
1471xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001472 int i;
Daniel Veillard6454aec1999-09-02 22:04:43 +00001473 CHAR *ret = xmlMalloc((len + 1) * sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001474
1475 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001476 fprintf(stderr, "malloc of %ld byte failed\n",
1477 (len + 1) * (long)sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001478 return(NULL);
1479 }
1480 for (i = 0;i < len;i++)
1481 ret[i] = (CHAR) cur[i];
1482 ret[len] = 0;
1483 return(ret);
1484}
1485
Daniel Veillard11e00581998-10-24 18:27:49 +00001486/**
1487 * xmlCharStrdup:
1488 * @cur: the input char *
1489 * @len: the len of @cur
1490 *
1491 * a strdup for char's to CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001492 *
1493 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001494 */
1495
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001496CHAR *
1497xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001498 const char *p = cur;
1499
1500 while (*p != '\0') p++;
1501 return(xmlCharStrndup(cur, p - cur));
1502}
1503
Daniel Veillard11e00581998-10-24 18:27:49 +00001504/**
1505 * xmlStrcmp:
1506 * @str1: the first CHAR *
1507 * @str2: the second CHAR *
1508 *
1509 * a strcmp for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001510 *
1511 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001512 */
1513
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001514int
1515xmlStrcmp(const CHAR *str1, const CHAR *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001516 register int tmp;
1517
1518 do {
1519 tmp = *str1++ - *str2++;
1520 if (tmp != 0) return(tmp);
1521 } while ((*str1 != 0) && (*str2 != 0));
1522 return (*str1 - *str2);
1523}
1524
Daniel Veillard11e00581998-10-24 18:27:49 +00001525/**
1526 * xmlStrncmp:
1527 * @str1: the first CHAR *
1528 * @str2: the second CHAR *
1529 * @len: the max comparison length
1530 *
1531 * a strncmp for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001532 *
1533 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001534 */
1535
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001536int
1537xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001538 register int tmp;
1539
1540 if (len <= 0) return(0);
1541 do {
1542 tmp = *str1++ - *str2++;
1543 if (tmp != 0) return(tmp);
1544 len--;
1545 if (len <= 0) return(0);
1546 } while ((*str1 != 0) && (*str2 != 0));
1547 return (*str1 - *str2);
1548}
1549
Daniel Veillard11e00581998-10-24 18:27:49 +00001550/**
1551 * xmlStrchr:
1552 * @str: the CHAR * array
1553 * @val: the CHAR to search
1554 *
1555 * a strchr for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001556 *
1557 * Returns the CHAR * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001558 */
1559
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001560const CHAR *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001561xmlStrchr(const CHAR *str, CHAR val) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001562 while (*str != 0) {
1563 if (*str == val) return((CHAR *) str);
1564 str++;
1565 }
1566 return(NULL);
1567}
1568
Daniel Veillard11e00581998-10-24 18:27:49 +00001569/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001570 * xmlStrstr:
1571 * @str: the CHAR * array (haystack)
1572 * @val: the CHAR to search (needle)
1573 *
1574 * a strstr for CHAR's
1575 *
1576 * Returns the CHAR * for the first occurence or NULL.
1577 */
1578
1579const CHAR *
1580xmlStrstr(const CHAR *str, CHAR *val) {
1581 int n;
1582
1583 if (str == NULL) return(NULL);
1584 if (val == NULL) return(NULL);
1585 n = xmlStrlen(val);
1586
1587 if (n == 0) return(str);
1588 while (*str != 0) {
1589 if (*str == *val) {
1590 if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1591 }
1592 str++;
1593 }
1594 return(NULL);
1595}
1596
1597/**
1598 * xmlStrsub:
1599 * @str: the CHAR * array (haystack)
1600 * @start: the index of the first char (zero based)
1601 * @len: the length of the substring
1602 *
1603 * Extract a substring of a given string
1604 *
1605 * Returns the CHAR * for the first occurence or NULL.
1606 */
1607
1608CHAR *
1609xmlStrsub(const CHAR *str, int start, int len) {
1610 int i;
1611
1612 if (str == NULL) return(NULL);
1613 if (start < 0) return(NULL);
1614 if (len < 0) return(NULL);
1615
1616 for (i = 0;i < start;i++) {
1617 if (*str == 0) return(NULL);
1618 str++;
1619 }
1620 if (*str == 0) return(NULL);
1621 return(xmlStrndup(str, len));
1622}
1623
1624/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001625 * xmlStrlen:
1626 * @str: the CHAR * array
1627 *
1628 * lenght of a CHAR's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00001629 *
1630 * Returns the number of CHAR contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001631 */
1632
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001633int
1634xmlStrlen(const CHAR *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001635 int len = 0;
1636
1637 if (str == NULL) return(0);
1638 while (*str != 0) {
1639 str++;
1640 len++;
1641 }
1642 return(len);
1643}
1644
Daniel Veillard11e00581998-10-24 18:27:49 +00001645/**
1646 * xmlStrncat:
Daniel Veillard1e346af1999-02-22 10:33:01 +00001647 * @cur: the original CHAR * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001648 * @add: the CHAR * array added
1649 * @len: the length of @add
1650 *
1651 * a strncat for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001652 *
1653 * Returns a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001654 */
1655
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001656CHAR *
1657xmlStrncat(CHAR *cur, const CHAR *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001658 int size;
1659 CHAR *ret;
1660
1661 if ((add == NULL) || (len == 0))
1662 return(cur);
1663 if (cur == NULL)
1664 return(xmlStrndup(add, len));
1665
1666 size = xmlStrlen(cur);
Daniel Veillard6454aec1999-09-02 22:04:43 +00001667 ret = xmlRealloc(cur, (size + len + 1) * sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001668 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001669 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1670 (size + len + 1) * (long)sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001671 return(cur);
1672 }
1673 memcpy(&ret[size], add, len * sizeof(CHAR));
1674 ret[size + len] = 0;
1675 return(ret);
1676}
1677
Daniel Veillard11e00581998-10-24 18:27:49 +00001678/**
1679 * xmlStrcat:
Daniel Veillard1e346af1999-02-22 10:33:01 +00001680 * @cur: the original CHAR * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001681 * @add: the CHAR * array added
1682 *
1683 * a strcat for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001684 *
1685 * Returns a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001686 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001687CHAR *
1688xmlStrcat(CHAR *cur, const CHAR *add) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001689 const CHAR *p = add;
1690
1691 if (add == NULL) return(cur);
1692 if (cur == NULL)
1693 return(xmlStrdup(add));
1694
1695 while (IS_CHAR(*p)) p++;
1696 return(xmlStrncat(cur, add, p - add));
1697}
1698
1699/************************************************************************
1700 * *
1701 * Commodity functions, cleanup needed ? *
1702 * *
1703 ************************************************************************/
1704
Daniel Veillard11e00581998-10-24 18:27:49 +00001705/**
1706 * areBlanks:
1707 * @ctxt: an XML parser context
1708 * @str: a CHAR *
1709 * @len: the size of @str
1710 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001711 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00001712 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00001713 * TODO: Whether white space are significant has to be checked accordingly
1714 * to DTD informations if available
Daniel Veillard1e346af1999-02-22 10:33:01 +00001715 *
1716 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001717 */
1718
1719static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001720 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001721 xmlNodePtr lastChild;
1722
1723 for (i = 0;i < len;i++)
1724 if (!(IS_BLANK(str[i]))) return(0);
1725
1726 if (CUR != '<') return(0);
Daniel Veillard517752b1999-04-05 12:20:10 +00001727 if (ctxt->node == NULL) return(0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001728 if (ctxt->myDoc != NULL) {
1729 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1730 if (ret == 0) return(1);
1731 if (ret == 1) return(0);
1732 }
1733 /*
1734 * heuristic
1735 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001736 lastChild = xmlGetLastChild(ctxt->node);
1737 if (lastChild == NULL) {
1738 if (ctxt->node->content != NULL) return(0);
1739 } else if (xmlNodeIsText(lastChild))
1740 return(0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001741 else if ((ctxt->node->childs != NULL) &&
1742 (xmlNodeIsText(ctxt->node->childs)))
1743 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001744 return(1);
1745}
1746
Daniel Veillard11e00581998-10-24 18:27:49 +00001747/**
1748 * xmlHandleEntity:
1749 * @ctxt: an XML parser context
1750 * @entity: an XML entity pointer.
1751 *
1752 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +00001753 * stream ? When do we just handle that as a set of chars ?
Daniel Veillardb05deb71999-08-10 19:04:08 +00001754 *
1755 * OBSOLETE: to be removed at some point.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001756 */
1757
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001758void
1759xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001760 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +00001761 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001762
1763 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001765 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00001766 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001767 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001768 return;
1769 }
1770 len = xmlStrlen(entity->content);
1771 if (len <= 2) goto handle_as_char;
1772
1773 /*
1774 * Redefine its content as an input stream.
1775 */
Daniel Veillardccb09631998-10-27 06:21:04 +00001776 input = xmlNewEntityInputStream(ctxt, entity);
1777 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001778 return;
1779
1780handle_as_char:
1781 /*
1782 * Just handle the content as a set of chars.
1783 */
Daniel Veillard517752b1999-04-05 12:20:10 +00001784 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001785 ctxt->sax->characters(ctxt->userData, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001786
1787}
1788
1789/*
1790 * Forward definition for recusive behaviour.
1791 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00001792void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1793void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001794
1795/************************************************************************
1796 * *
1797 * Extra stuff for namespace support *
1798 * Relates to http://www.w3.org/TR/WD-xml-names *
1799 * *
1800 ************************************************************************/
1801
Daniel Veillard11e00581998-10-24 18:27:49 +00001802/**
1803 * xmlNamespaceParseNCName:
1804 * @ctxt: an XML parser context
1805 *
1806 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001807 *
1808 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1809 *
1810 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1811 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +00001812 *
1813 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001814 */
1815
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001816CHAR *
1817xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001818 CHAR buf[XML_MAX_NAMELEN];
1819 int len = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001820
1821 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001822
1823 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1824 (CUR == '.') || (CUR == '-') ||
1825 (CUR == '_') ||
1826 (IS_COMBINING(CUR)) ||
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001827 (IS_EXTENDER(CUR))) {
1828 buf[len++] = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001829 NEXT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001830 if (len >= XML_MAX_NAMELEN) {
1831 fprintf(stderr,
1832 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1833 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1834 (CUR == '.') || (CUR == '-') ||
1835 (CUR == '_') ||
1836 (IS_COMBINING(CUR)) ||
1837 (IS_EXTENDER(CUR)))
1838 NEXT;
1839 break;
1840 }
1841 }
1842 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001843}
1844
Daniel Veillard11e00581998-10-24 18:27:49 +00001845/**
1846 * xmlNamespaceParseQName:
1847 * @ctxt: an XML parser context
1848 * @prefix: a CHAR **
1849 *
1850 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001851 *
1852 * [NS 5] QName ::= (Prefix ':')? LocalPart
1853 *
1854 * [NS 6] Prefix ::= NCName
1855 *
1856 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +00001857 *
1858 * Returns the function returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +00001859 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001860 */
1861
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001862CHAR *
1863xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001864 CHAR *ret = NULL;
1865
1866 *prefix = NULL;
1867 ret = xmlNamespaceParseNCName(ctxt);
1868 if (CUR == ':') {
1869 *prefix = ret;
1870 NEXT;
1871 ret = xmlNamespaceParseNCName(ctxt);
1872 }
1873
1874 return(ret);
1875}
1876
Daniel Veillard11e00581998-10-24 18:27:49 +00001877/**
Daniel Veillard517752b1999-04-05 12:20:10 +00001878 * xmlSplitQName:
1879 * @name: an XML parser context
1880 * @prefix: a CHAR **
1881 *
1882 * parse an XML qualified name string
1883 *
1884 * [NS 5] QName ::= (Prefix ':')? LocalPart
1885 *
1886 * [NS 6] Prefix ::= NCName
1887 *
1888 * [NS 7] LocalPart ::= NCName
1889 *
1890 * Returns the function returns the local part, and prefix is updated
1891 * to get the Prefix if any.
1892 */
1893
1894CHAR *
1895xmlSplitQName(const CHAR *name, CHAR **prefix) {
1896 CHAR *ret = NULL;
1897 const CHAR *q;
1898 const CHAR *cur = name;
1899
1900 *prefix = NULL;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001901
1902 /* xml: prefix is not really a namespace */
1903 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1904 (cur[2] == 'l') && (cur[3] == ':'))
1905 return(xmlStrdup(name));
1906
Daniel Veillard517752b1999-04-05 12:20:10 +00001907 if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1908 q = cur++;
1909
1910 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1911 (*cur == '.') || (*cur == '-') ||
1912 (*cur == '_') ||
1913 (IS_COMBINING(*cur)) ||
1914 (IS_EXTENDER(*cur)))
1915 cur++;
1916
1917 ret = xmlStrndup(q, cur - q);
1918
1919 if (*cur == ':') {
1920 cur++;
1921 if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1922 *prefix = ret;
1923
1924 q = cur++;
1925
1926 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1927 (*cur == '.') || (*cur == '-') ||
1928 (*cur == '_') ||
1929 (IS_COMBINING(*cur)) ||
1930 (IS_EXTENDER(*cur)))
1931 cur++;
1932
1933 ret = xmlStrndup(q, cur - q);
1934 }
1935
1936 return(ret);
1937}
1938/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001939 * xmlNamespaceParseNSDef:
1940 * @ctxt: an XML parser context
1941 *
1942 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001943 *
1944 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1945 *
1946 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +00001947 *
1948 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001949 */
1950
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001951CHAR *
1952xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001953 CHAR *name = NULL;
1954
1955 if ((CUR == 'x') && (NXT(1) == 'm') &&
1956 (NXT(2) == 'l') && (NXT(3) == 'n') &&
1957 (NXT(4) == 's')) {
1958 SKIP(5);
1959 if (CUR == ':') {
1960 NEXT;
1961 name = xmlNamespaceParseNCName(ctxt);
1962 }
1963 }
1964 return(name);
1965}
1966
Daniel Veillard11e00581998-10-24 18:27:49 +00001967/**
1968 * xmlParseQuotedString:
1969 * @ctxt: an XML parser context
1970 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001971 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillardb05deb71999-08-10 19:04:08 +00001972 * To be removed at next drop of binary compatibility
Daniel Veillard1e346af1999-02-22 10:33:01 +00001973 *
1974 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001975 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001976CHAR *
1977xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001978 CHAR *ret = NULL;
1979 const CHAR *q;
1980
1981 if (CUR == '"') {
1982 NEXT;
1983 q = CUR_PTR;
1984 while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001985 if (CUR != '"') {
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001987 ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001988 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001989 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001990 ret = xmlStrndup(q, CUR_PTR - q);
1991 NEXT;
1992 }
1993 } else if (CUR == '\''){
1994 NEXT;
1995 q = CUR_PTR;
1996 while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001997 if (CUR != '\'') {
1998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001999 ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002000 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002001 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002002 ret = xmlStrndup(q, CUR_PTR - q);
2003 NEXT;
2004 }
2005 }
2006 return(ret);
2007}
2008
Daniel Veillard11e00581998-10-24 18:27:49 +00002009/**
2010 * xmlParseNamespace:
2011 * @ctxt: an XML parser context
2012 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002013 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2014 *
2015 * This is what the older xml-name Working Draft specified, a bunch of
2016 * other stuff may still rely on it, so support is still here as
2017 * if ot was declared on the root of the Tree:-(
Daniel Veillardb05deb71999-08-10 19:04:08 +00002018 *
2019 * To be removed at next drop of binary compatibility
Daniel Veillard260a68f1998-08-13 03:39:55 +00002020 */
2021
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002022void
2023xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002024 CHAR *href = NULL;
2025 CHAR *prefix = NULL;
2026 int garbage = 0;
2027
2028 /*
2029 * We just skipped "namespace" or "xml:namespace"
2030 */
2031 SKIP_BLANKS;
2032
2033 while (IS_CHAR(CUR) && (CUR != '>')) {
2034 /*
2035 * We can have "ns" or "prefix" attributes
2036 * Old encoding as 'href' or 'AS' attributes is still supported
2037 */
2038 if ((CUR == 'n') && (NXT(1) == 's')) {
2039 garbage = 0;
2040 SKIP(2);
2041 SKIP_BLANKS;
2042
2043 if (CUR != '=') continue;
2044 NEXT;
2045 SKIP_BLANKS;
2046
2047 href = xmlParseQuotedString(ctxt);
2048 SKIP_BLANKS;
2049 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2050 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2051 garbage = 0;
2052 SKIP(4);
2053 SKIP_BLANKS;
2054
2055 if (CUR != '=') continue;
2056 NEXT;
2057 SKIP_BLANKS;
2058
2059 href = xmlParseQuotedString(ctxt);
2060 SKIP_BLANKS;
2061 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2062 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2063 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2064 garbage = 0;
2065 SKIP(6);
2066 SKIP_BLANKS;
2067
2068 if (CUR != '=') continue;
2069 NEXT;
2070 SKIP_BLANKS;
2071
2072 prefix = xmlParseQuotedString(ctxt);
2073 SKIP_BLANKS;
2074 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2075 garbage = 0;
2076 SKIP(2);
2077 SKIP_BLANKS;
2078
2079 if (CUR != '=') continue;
2080 NEXT;
2081 SKIP_BLANKS;
2082
2083 prefix = xmlParseQuotedString(ctxt);
2084 SKIP_BLANKS;
2085 } else if ((CUR == '?') && (NXT(1) == '>')) {
2086 garbage = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002087 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002088 } else {
2089 /*
2090 * Found garbage when parsing the namespace
2091 */
2092 if (!garbage)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002094 ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002095 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002096 NEXT;
2097 }
2098 }
2099
2100 MOVETO_ENDTAG(CUR_PTR);
2101 NEXT;
2102
2103 /*
2104 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002105 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +00002106 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002107 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
Daniel Veillard517752b1999-04-05 12:20:10 +00002108 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00002109
Daniel Veillard6454aec1999-09-02 22:04:43 +00002110 if (prefix != NULL) xmlFree(prefix);
2111 if (href != NULL) xmlFree(href);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002112}
2113
2114/************************************************************************
2115 * *
2116 * The parser itself *
2117 * Relates to http://www.w3.org/TR/REC-xml *
2118 * *
2119 ************************************************************************/
2120
Daniel Veillard11e00581998-10-24 18:27:49 +00002121/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00002122 * xmlScanName:
2123 * @ctxt: an XML parser context
2124 *
2125 * Trickery: parse an XML name but without consuming the input flow
2126 * Needed for rollback cases.
2127 *
2128 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2129 * CombiningChar | Extender
2130 *
2131 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2132 *
2133 * [6] Names ::= Name (S Name)*
2134 *
2135 * Returns the Name parsed or NULL
2136 */
2137
2138CHAR *
2139xmlScanName(xmlParserCtxtPtr ctxt) {
2140 CHAR buf[XML_MAX_NAMELEN];
2141 int len = 0;
2142
2143 GROW;
2144 if (!IS_LETTER(CUR) && (CUR != '_') &&
2145 (CUR != ':')) {
2146 return(NULL);
2147 }
2148
2149 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2150 (NXT(len) == '.') || (NXT(len) == '-') ||
2151 (NXT(len) == '_') || (NXT(len) == ':') ||
2152 (IS_COMBINING(NXT(len))) ||
2153 (IS_EXTENDER(NXT(len)))) {
2154 buf[len] = NXT(len);
2155 len++;
2156 if (len >= XML_MAX_NAMELEN) {
2157 fprintf(stderr,
2158 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2159 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2160 (NXT(len) == '.') || (NXT(len) == '-') ||
2161 (NXT(len) == '_') || (NXT(len) == ':') ||
2162 (IS_COMBINING(NXT(len))) ||
2163 (IS_EXTENDER(NXT(len))))
2164 len++;
2165 break;
2166 }
2167 }
2168 return(xmlStrndup(buf, len));
2169}
2170
2171/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002172 * xmlParseName:
2173 * @ctxt: an XML parser context
2174 *
2175 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002176 *
2177 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2178 * CombiningChar | Extender
2179 *
2180 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2181 *
2182 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002183 *
2184 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002185 */
2186
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002187CHAR *
2188xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002189 CHAR buf[XML_MAX_NAMELEN];
2190 int len = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002191 CHAR cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002192
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002193 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002194 cur = CUR;
2195 if (!IS_LETTER(cur) && (cur != '_') &&
2196 (cur != ':')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002197 return(NULL);
2198 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002199
Daniel Veillardb05deb71999-08-10 19:04:08 +00002200 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2201 (cur == '.') || (cur == '-') ||
2202 (cur == '_') || (cur == ':') ||
2203 (IS_COMBINING(cur)) ||
2204 (IS_EXTENDER(cur))) {
2205 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002206 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002207 cur = CUR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002208 if (len >= XML_MAX_NAMELEN) {
2209 fprintf(stderr,
2210 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardb05deb71999-08-10 19:04:08 +00002211 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2212 (cur == '.') || (cur == '-') ||
2213 (cur == '_') || (cur == ':') ||
2214 (IS_COMBINING(cur)) ||
2215 (IS_EXTENDER(cur))) {
2216 NEXT;
2217 cur = CUR;
2218 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002219 break;
2220 }
2221 }
2222 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002223}
2224
Daniel Veillard11e00581998-10-24 18:27:49 +00002225/**
2226 * xmlParseNmtoken:
2227 * @ctxt: an XML parser context
2228 *
2229 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002230 *
2231 * [7] Nmtoken ::= (NameChar)+
2232 *
2233 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002234 *
2235 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002236 */
2237
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002238CHAR *
2239xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002240 CHAR buf[XML_MAX_NAMELEN];
2241 int len = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002242
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002243 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002244 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2245 (CUR == '.') || (CUR == '-') ||
2246 (CUR == '_') || (CUR == ':') ||
2247 (IS_COMBINING(CUR)) ||
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002248 (IS_EXTENDER(CUR))) {
2249 buf[len++] = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002250 NEXT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002251 if (len >= XML_MAX_NAMELEN) {
2252 fprintf(stderr,
2253 "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2254 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2255 (CUR == '.') || (CUR == '-') ||
2256 (CUR == '_') || (CUR == ':') ||
2257 (IS_COMBINING(CUR)) ||
2258 (IS_EXTENDER(CUR)))
2259 NEXT;
2260 break;
2261 }
2262 }
2263 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002264}
2265
Daniel Veillard11e00581998-10-24 18:27:49 +00002266/**
2267 * xmlParseEntityValue:
2268 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00002269 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00002270 *
2271 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002272 *
2273 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2274 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002275 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002276 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002277 */
2278
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002279CHAR *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002280xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
2281 CHAR *ret = NULL;
2282 const CHAR *org = NULL;
2283 const CHAR *tst = NULL;
2284 const CHAR *temp = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002285 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002286
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002287 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002288 if (CUR == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002289 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2290 input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002291 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002292 org = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002293 /*
2294 * NOTE: 4.4.5 Included in Literal
2295 * When a parameter entity reference appears in a literal entity
2296 * value, ... a single or double quote character in the replacement
2297 * text is always treated as a normal data character and will not
2298 * terminate the literal.
2299 * In practice it means we stop the loop only when back at parsing
2300 * the initial entity and the quote is found
2301 */
2302 while ((CUR != '"') || (ctxt->input != input)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002303 tst = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002304 /*
2305 * NOTE: 4.4.7 Bypassed
2306 * When a general entity reference appears in the EntityValue in
2307 * an entity declaration, it is bypassed and left as is.
2308 * so XML_SUBSTITUTE_REF is not set.
2309 */
2310 if (ctxt->input != input)
2311 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2312 0, 0, 0);
2313 else
2314 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2315 '"', 0, 0);
2316
2317 /*
2318 * Pop-up of finished entities.
2319 */
2320 while ((CUR == 0) && (ctxt->inputNr > 1))
2321 xmlPopInput(ctxt);
2322
2323 if ((temp == NULL) && (tst == CUR_PTR)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00002324 ret = xmlStrndup((CHAR *) "", 0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002325 break;
2326 }
2327 if ((temp[0] == 0) && (tst == CUR_PTR)) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00002328 xmlFree((char *)temp);
Daniel Veillardb96e6431999-08-29 21:02:19 +00002329 ret = xmlStrndup((CHAR *) "", 0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002330 break;
2331 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002332 ret = xmlStrcat(ret, temp);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002333 if (temp != NULL) xmlFree((char *)temp);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002334 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002335 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002336 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002338 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002339 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002340 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002341 if (orig != NULL) /* !!!!!!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002342 *orig = xmlStrndup(org, CUR_PTR - org);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002343 if (ret == NULL)
Daniel Veillardb96e6431999-08-29 21:02:19 +00002344 ret = xmlStrndup((CHAR *) "", 0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002345 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002346 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002347 } else if (CUR == '\'') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002348 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2349 input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002350 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002351 org = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002352 /*
2353 * NOTE: 4.4.5 Included in Literal
2354 * When a parameter entity reference appears in a literal entity
2355 * value, ... a single or double quote character in the replacement
2356 * text is always treated as a normal data character and will not
2357 * terminate the literal.
2358 * In practice it means we stop the loop only when back at parsing
2359 * the initial entity and the quote is found
2360 */
2361 while ((CUR != '\'') || (ctxt->input != input)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002362 tst = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002363 /*
2364 * NOTE: 4.4.7 Bypassed
2365 * When a general entity reference appears in the EntityValue in
2366 * an entity declaration, it is bypassed and left as is.
2367 * so XML_SUBSTITUTE_REF is not set.
2368 */
2369 if (ctxt->input != input)
2370 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2371 0, 0, 0);
2372 else
2373 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2374 '\'', 0, 0);
2375
2376 /*
2377 * Pop-up of finished entities.
2378 */
2379 while ((CUR == 0) && (ctxt->inputNr > 1))
2380 xmlPopInput(ctxt);
2381
2382 if ((temp == NULL) && (tst == CUR_PTR)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00002383 ret = xmlStrndup((CHAR *) "", 0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002384 break;
2385 }
2386 if ((temp[0] == 0) && (tst == CUR_PTR)) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00002387 xmlFree((char *)temp);
Daniel Veillardb96e6431999-08-29 21:02:19 +00002388 ret = xmlStrndup((CHAR *) "", 0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002389 break;
2390 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002391 ret = xmlStrcat(ret, temp);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002392 if (temp != NULL) xmlFree((char *)temp);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002393 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002394 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002395 if (CUR != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002397 ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002398 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002399 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002400 if (orig != NULL) /* !!!!!!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002401 *orig = xmlStrndup(org, CUR_PTR - org);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002402 if (ret == NULL)
Daniel Veillardb96e6431999-08-29 21:02:19 +00002403 ret = xmlStrndup((CHAR *) "", 0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002404 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002405 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002406 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002408 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002409 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002410 }
2411
2412 return(ret);
2413}
2414
Daniel Veillard11e00581998-10-24 18:27:49 +00002415/**
2416 * xmlParseAttValue:
2417 * @ctxt: an XML parser context
2418 *
2419 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00002420 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00002421 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00002422 *
2423 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2424 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002425 *
2426 * Returns the AttValue parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002427 */
2428
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002429CHAR *
2430xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002431 CHAR *ret = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002432
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002433 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002434 if (CUR == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002435 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002436 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002437 ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002438 if (CUR == '<') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002439 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002440 ctxt->sax->error(ctxt->userData,
2441 "Unescaped '<' not allowed in attributes values\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002442 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002443 }
2444 if (CUR != '"') {
2445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2446 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2447 ctxt->wellFormed = 0;
2448 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00002449 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002450 } else if (CUR == '\'') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002451 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002452 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002453 ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002454 if (CUR == '<') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002455 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002456 ctxt->sax->error(ctxt->userData,
2457 "Unescaped '<' not allowed in attributes values\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002458 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002459 }
2460 if (CUR != '\'') {
2461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2462 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2463 ctxt->wellFormed = 0;
2464 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00002465 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002466 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002468 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002469 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002470 }
2471
2472 return(ret);
2473}
2474
Daniel Veillard11e00581998-10-24 18:27:49 +00002475/**
2476 * xmlParseSystemLiteral:
2477 * @ctxt: an XML parser context
2478 *
2479 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00002480 *
2481 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00002482 *
2483 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002484 */
2485
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002486CHAR *
2487xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002488 const CHAR *q;
2489 CHAR *ret = NULL;
2490
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002491 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002492 if (CUR == '"') {
2493 NEXT;
2494 q = CUR_PTR;
2495 while ((IS_CHAR(CUR)) && (CUR != '"'))
2496 NEXT;
2497 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002499 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002500 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002501 } else {
2502 ret = xmlStrndup(q, CUR_PTR - q);
2503 NEXT;
2504 }
2505 } else if (CUR == '\'') {
2506 NEXT;
2507 q = CUR_PTR;
2508 while ((IS_CHAR(CUR)) && (CUR != '\''))
2509 NEXT;
2510 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002512 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002513 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002514 } else {
2515 ret = xmlStrndup(q, CUR_PTR - q);
2516 NEXT;
2517 }
2518 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002520 ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002521 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002522 }
2523
2524 return(ret);
2525}
2526
Daniel Veillard11e00581998-10-24 18:27:49 +00002527/**
2528 * xmlParsePubidLiteral:
2529 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00002530 *
Daniel Veillard11e00581998-10-24 18:27:49 +00002531 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00002532 *
2533 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2534 *
2535 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002536 */
2537
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002538CHAR *
2539xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002540 const CHAR *q;
2541 CHAR *ret = NULL;
2542 /*
2543 * Name ::= (Letter | '_') (NameChar)*
2544 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002545 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002546 if (CUR == '"') {
2547 NEXT;
2548 q = CUR_PTR;
2549 while (IS_PUBIDCHAR(CUR)) NEXT;
2550 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002552 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002553 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002554 } else {
2555 ret = xmlStrndup(q, CUR_PTR - q);
2556 NEXT;
2557 }
2558 } else if (CUR == '\'') {
2559 NEXT;
2560 q = CUR_PTR;
2561 while ((IS_LETTER(CUR)) && (CUR != '\''))
2562 NEXT;
2563 if (!IS_LETTER(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002565 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002566 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002567 } else {
2568 ret = xmlStrndup(q, CUR_PTR - q);
2569 NEXT;
2570 }
2571 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002573 ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002574 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002575 }
2576
2577 return(ret);
2578}
2579
Daniel Veillard11e00581998-10-24 18:27:49 +00002580/**
2581 * xmlParseCharData:
2582 * @ctxt: an XML parser context
2583 * @cdata: int indicating whether we are within a CDATA section
2584 *
2585 * parse a CharData section.
2586 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002587 *
2588 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2589 */
2590
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002591void
2592xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002593 CHAR buf[1000];
2594 int nbchar = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002595 CHAR cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002596
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002597 SHRINK;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002598 /*
2599 * !!!!!!!!!!!!
2600 * NOTE: NXT(0) is used here to avoid breaking on &lt; or &amp;
2601 * entities substitutions.
2602 */
2603 cur = CUR;
2604 while ((IS_CHAR(cur)) && (cur != '<') &&
2605 (cur != '&')) {
2606 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002607 (NXT(2) == '>')) {
2608 if (cdata) break;
2609 else {
2610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002611 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002612 "Sequence ']]>' not allowed in content\n");
2613 ctxt->wellFormed = 0;
2614 }
2615 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002616 buf[nbchar++] = CUR;
2617 if (nbchar == 1000) {
2618 /*
2619 * Ok the segment is to be consumed as chars.
2620 */
2621 if (ctxt->sax != NULL) {
2622 if (areBlanks(ctxt, buf, nbchar)) {
2623 if (ctxt->sax->ignorableWhitespace != NULL)
2624 ctxt->sax->ignorableWhitespace(ctxt->userData,
2625 buf, nbchar);
2626 } else {
2627 if (ctxt->sax->characters != NULL)
2628 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2629 }
2630 }
2631 nbchar = 0;
2632 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002633 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002634 cur = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002635 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002636 if (nbchar != 0) {
2637 /*
2638 * Ok the segment is to be consumed as chars.
2639 */
2640 if (ctxt->sax != NULL) {
2641 if (areBlanks(ctxt, buf, nbchar)) {
2642 if (ctxt->sax->ignorableWhitespace != NULL)
2643 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2644 } else {
2645 if (ctxt->sax->characters != NULL)
2646 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2647 }
2648 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002649 }
2650}
2651
Daniel Veillard11e00581998-10-24 18:27:49 +00002652/**
2653 * xmlParseExternalID:
2654 * @ctxt: an XML parser context
2655 * @publicID: a CHAR** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002656 * @strict: indicate whether we should restrict parsing to only
2657 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00002658 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002659 * Parse an External ID or a Public ID
2660 *
2661 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2662 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00002663 *
2664 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2665 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002666 *
2667 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2668 *
2669 * Returns the function returns SystemLiteral and in the second
2670 * case publicID receives PubidLiteral, is strict is off
2671 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002672 */
2673
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002674CHAR *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002675xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002676 CHAR *URI = NULL;
2677
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002678 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002679 if ((CUR == 'S') && (NXT(1) == 'Y') &&
2680 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2681 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2682 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002683 if (!IS_BLANK(CUR)) {
2684 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002685 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002686 "Space required after 'SYSTEM'\n");
2687 ctxt->wellFormed = 0;
2688 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002689 SKIP_BLANKS;
2690 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002691 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002693 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002694 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002695 ctxt->wellFormed = 0;
2696 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002697 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2698 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2699 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2700 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002701 if (!IS_BLANK(CUR)) {
2702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002703 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002704 "Space required after 'PUBLIC'\n");
2705 ctxt->wellFormed = 0;
2706 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002707 SKIP_BLANKS;
2708 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002709 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002711 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002712 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002713 ctxt->wellFormed = 0;
2714 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00002715 if (strict) {
2716 /*
2717 * We don't handle [83] so "S SystemLiteral" is required.
2718 */
2719 if (!IS_BLANK(CUR)) {
2720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002721 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002722 "Space required after the Public Identifier\n");
2723 ctxt->wellFormed = 0;
2724 }
2725 } else {
2726 /*
2727 * We handle [83] so we return immediately, if
2728 * "S SystemLiteral" is not detected. From a purely parsing
2729 * point of view that's a nice mess.
2730 */
2731 const CHAR *ptr = CUR_PTR;
2732 if (!IS_BLANK(*ptr)) return(NULL);
2733
2734 while (IS_BLANK(*ptr)) ptr++;
2735 if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002736 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002737 SKIP_BLANKS;
2738 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002739 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002741 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002742 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002743 ctxt->wellFormed = 0;
2744 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002745 }
2746 return(URI);
2747}
2748
Daniel Veillard11e00581998-10-24 18:27:49 +00002749/**
2750 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00002751 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00002752 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002753 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00002754 * The spec says that "For compatibility, the string "--" (double-hyphen)
2755 * must not occur within comments. "
2756 *
2757 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2758 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002759void
Daniel Veillardb96e6431999-08-29 21:02:19 +00002760xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002761 const CHAR *q, *start;
2762 const CHAR *r;
2763 CHAR *val;
2764
2765 /*
2766 * Check that there is a comment right here.
2767 */
2768 if ((CUR != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00002769 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002770
Daniel Veillardb05deb71999-08-10 19:04:08 +00002771 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002772 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002773 SKIP(4);
2774 start = q = CUR_PTR;
2775 NEXT;
2776 r = CUR_PTR;
2777 NEXT;
2778 while (IS_CHAR(CUR) &&
2779 ((CUR == ':') || (CUR != '>') ||
2780 (*r != '-') || (*q != '-'))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002781 if ((*r == '-') && (*q == '-')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002783 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002784 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002785 ctxt->wellFormed = 0;
2786 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002787 NEXT;r++;q++;
2788 }
2789 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002791 ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002792 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002793 } else {
2794 NEXT;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002795 val = xmlStrndup(start, q - start);
2796 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
2797 ctxt->sax->comment(ctxt->userData, val);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002798 xmlFree(val);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002799 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002800}
2801
Daniel Veillard11e00581998-10-24 18:27:49 +00002802/**
2803 * xmlParsePITarget:
2804 * @ctxt: an XML parser context
2805 *
2806 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00002807 *
2808 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00002809 *
2810 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002811 */
2812
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002813CHAR *
2814xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002815 CHAR *name;
2816
2817 name = xmlParseName(ctxt);
2818 if ((name != NULL) && (name[3] == 0) &&
2819 ((name[0] == 'x') || (name[0] == 'X')) &&
2820 ((name[1] == 'm') || (name[1] == 'M')) &&
2821 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002823 ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002824 return(NULL);
2825 }
2826 return(name);
2827}
2828
Daniel Veillard11e00581998-10-24 18:27:49 +00002829/**
2830 * xmlParsePI:
2831 * @ctxt: an XML parser context
2832 *
2833 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002834 *
2835 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002836 *
2837 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002838 */
2839
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002840void
2841xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002842 CHAR *target;
2843
2844 if ((CUR == '<') && (NXT(1) == '?')) {
2845 /*
2846 * this is a Processing Instruction.
2847 */
2848 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002849 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002850
2851 /*
2852 * Parse the target name and check for special support like
2853 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002854 */
2855 target = xmlParsePITarget(ctxt);
2856 if (target != NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00002857 const CHAR *q;
Daniel Veillard517752b1999-04-05 12:20:10 +00002858
Daniel Veillardb96e6431999-08-29 21:02:19 +00002859 if (!IS_BLANK(CUR)) {
2860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2861 ctxt->sax->error(ctxt->userData,
2862 "xmlParsePI: PI %s space expected\n", target);
2863 ctxt->wellFormed = 0;
2864 }
2865 SKIP_BLANKS;
2866 q = CUR_PTR;
Daniel Veillard517752b1999-04-05 12:20:10 +00002867 while (IS_CHAR(CUR) &&
2868 ((CUR != '?') || (NXT(1) != '>')))
2869 NEXT;
2870 if (!IS_CHAR(CUR)) {
2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002872 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00002873 "xmlParsePI: PI %s never end ...\n", target);
2874 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002875 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00002876 CHAR *data;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002877
Daniel Veillard517752b1999-04-05 12:20:10 +00002878 data = xmlStrndup(q, CUR_PTR - q);
2879 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002880
Daniel Veillard517752b1999-04-05 12:20:10 +00002881 /*
2882 * SAX: PI detected.
2883 */
2884 if ((ctxt->sax) &&
2885 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002886 ctxt->sax->processingInstruction(ctxt->userData,
2887 target, data);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002888 xmlFree(data);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002889 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00002890 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002891 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002893 ctxt->sax->error(ctxt->userData,
2894 "xmlParsePI : no target name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002895 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002896 }
2897 }
2898}
2899
Daniel Veillard11e00581998-10-24 18:27:49 +00002900/**
2901 * xmlParseNotationDecl:
2902 * @ctxt: an XML parser context
2903 *
2904 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002905 *
2906 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2907 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002908 * Hence there is actually 3 choices:
2909 * 'PUBLIC' S PubidLiteral
2910 * 'PUBLIC' S PubidLiteral S SystemLiteral
2911 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00002912 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002913 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00002914 */
2915
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002916void
2917xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002918 CHAR *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002919 CHAR *Pubid;
2920 CHAR *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002921
2922 if ((CUR == '<') && (NXT(1) == '!') &&
2923 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2924 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2925 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00002926 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002927 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002928 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002929 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002931 ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002932 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002933 return;
2934 }
2935 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002936
2937 name = xmlParseName(ctxt);
2938 if (name == NULL) {
2939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002940 ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00002941 ctxt->wellFormed = 0;
2942 return;
2943 }
2944 if (!IS_BLANK(CUR)) {
2945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002946 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002947 "Space required after the NOTATION name'\n");
2948 ctxt->wellFormed = 0;
2949 return;
2950 }
2951 SKIP_BLANKS;
2952
Daniel Veillard260a68f1998-08-13 03:39:55 +00002953 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002954 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002955 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00002956 Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2957 SKIP_BLANKS;
2958
2959 if (CUR == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002960 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00002961 if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002962 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002963 } else {
2964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002965 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002966 "'>' required to close NOTATION declaration\n");
2967 ctxt->wellFormed = 0;
2968 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00002969 xmlFree(name);
2970 if (Systemid != NULL) xmlFree(Systemid);
2971 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002972 }
2973}
2974
Daniel Veillard11e00581998-10-24 18:27:49 +00002975/**
2976 * xmlParseEntityDecl:
2977 * @ctxt: an XML parser context
2978 *
2979 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002980 *
2981 * [70] EntityDecl ::= GEDecl | PEDecl
2982 *
2983 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2984 *
2985 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2986 *
2987 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2988 *
2989 * [74] PEDef ::= EntityValue | ExternalID
2990 *
2991 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00002992 *
2993 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00002994 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002995 */
2996
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002997void
2998xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002999 CHAR *name = NULL;
3000 CHAR *value = NULL;
3001 CHAR *URI = NULL, *literal = NULL;
3002 CHAR *ndata = NULL;
3003 int isParameter = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00003004 CHAR *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003005
Daniel Veillardb05deb71999-08-10 19:04:08 +00003006 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003007 if ((CUR == '<') && (NXT(1) == '!') &&
3008 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3009 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003010 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003011 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003012 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003013 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003014 if (!IS_BLANK(CUR)) {
3015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003016 ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003017 ctxt->wellFormed = 0;
3018 }
3019 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003020
3021 if (CUR == '%') {
3022 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003023 if (!IS_BLANK(CUR)) {
3024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003025 ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003026 ctxt->wellFormed = 0;
3027 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003028 SKIP_BLANKS;
3029 isParameter = 1;
3030 }
3031
3032 name = xmlParseName(ctxt);
3033 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003035 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003036 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003037 return;
3038 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003039 if (!IS_BLANK(CUR)) {
3040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003041 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003042 "Space required after the entity name\n");
3043 ctxt->wellFormed = 0;
3044 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003045 SKIP_BLANKS;
3046
3047 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003048 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00003049 */
3050 if (isParameter) {
3051 if ((CUR == '"') || (CUR == '\''))
Daniel Veillard011b63c1999-06-02 17:44:04 +00003052 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003053 if (value) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003054 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003055 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003056 XML_INTERNAL_PARAMETER_ENTITY,
3057 NULL, NULL, value);
3058 }
3059 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003060 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003061 if (URI) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003062 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003063 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003064 XML_EXTERNAL_PARAMETER_ENTITY,
3065 literal, URI, NULL);
3066 }
3067 }
3068 } else {
3069 if ((CUR == '"') || (CUR == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003070 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard517752b1999-04-05 12:20:10 +00003071 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003072 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003073 XML_INTERNAL_GENERAL_ENTITY,
3074 NULL, NULL, value);
3075 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003076 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003077 if ((CUR != '>') && (!IS_BLANK(CUR))) {
3078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003079 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003080 "Space required before 'NDATA'\n");
3081 ctxt->wellFormed = 0;
3082 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003083 SKIP_BLANKS;
3084 if ((CUR == 'N') && (NXT(1) == 'D') &&
3085 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3086 (NXT(4) == 'A')) {
3087 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003088 if (!IS_BLANK(CUR)) {
3089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003090 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003091 "Space required after 'NDATA'\n");
3092 ctxt->wellFormed = 0;
3093 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003094 SKIP_BLANKS;
3095 ndata = xmlParseName(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00003096 if ((ctxt->sax != NULL) &&
3097 (ctxt->sax->unparsedEntityDecl != NULL))
3098 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003099 literal, URI, ndata);
3100 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00003101 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003102 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003103 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3104 literal, URI, NULL);
3105 }
3106 }
3107 }
3108 SKIP_BLANKS;
3109 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003111 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003112 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003113 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003114 } else
3115 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00003116 if (orig != NULL) {
3117 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00003118 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00003119 */
3120 xmlEntityPtr cur = NULL;
3121
Daniel Veillardb05deb71999-08-10 19:04:08 +00003122 if (isParameter) {
3123 if ((ctxt->sax != NULL) &&
3124 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003125 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003126 } else {
3127 if ((ctxt->sax != NULL) &&
3128 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00003129 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003130 }
3131 if (cur != NULL) {
3132 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003133 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003134 else
3135 cur->orig = orig;
3136 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00003137 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00003138 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003139 if (name != NULL) xmlFree(name);
3140 if (value != NULL) xmlFree(value);
3141 if (URI != NULL) xmlFree(URI);
3142 if (literal != NULL) xmlFree(literal);
3143 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003144 }
3145}
3146
Daniel Veillard11e00581998-10-24 18:27:49 +00003147/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003148 * xmlParseDefaultDecl:
3149 * @ctxt: an XML parser context
3150 * @value: Receive a possible fixed default value for the attribute
3151 *
3152 * Parse an attribute default declaration
3153 *
3154 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3155 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003156 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003157 * if the default declaration is the keyword #REQUIRED, then the
3158 * attribute must be specified for all elements of the type in the
3159 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003160 *
3161 * [ VC: Attribute Default Legal ]
3162 * The declared default value must meet the lexical constraints of
3163 * the declared attribute type c.f. xmlValidateAttributeDecl()
3164 *
3165 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003166 * if an attribute has a default value declared with the #FIXED
3167 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003168 *
3169 * [ WFC: No < in Attribute Values ]
3170 * handled in xmlParseAttValue()
3171 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003172 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3173 * or XML_ATTRIBUTE_FIXED.
3174 */
3175
3176int
3177xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3178 int val;
3179 CHAR *ret;
3180
3181 *value = NULL;
3182 if ((CUR == '#') && (NXT(1) == 'R') &&
3183 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3184 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3185 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3186 (NXT(8) == 'D')) {
3187 SKIP(9);
3188 return(XML_ATTRIBUTE_REQUIRED);
3189 }
3190 if ((CUR == '#') && (NXT(1) == 'I') &&
3191 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3192 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3193 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3194 SKIP(8);
3195 return(XML_ATTRIBUTE_IMPLIED);
3196 }
3197 val = XML_ATTRIBUTE_NONE;
3198 if ((CUR == '#') && (NXT(1) == 'F') &&
3199 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3200 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3201 SKIP(6);
3202 val = XML_ATTRIBUTE_FIXED;
3203 if (!IS_BLANK(CUR)) {
3204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003205 ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003206 ctxt->wellFormed = 0;
3207 }
3208 SKIP_BLANKS;
3209 }
3210 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003211 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003212 if (ret == NULL) {
3213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003214 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003215 "Attribute default value declaration error\n");
3216 ctxt->wellFormed = 0;
3217 } else
3218 *value = ret;
3219 return(val);
3220}
3221
3222/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00003223 * xmlParseNotationType:
3224 * @ctxt: an XML parser context
3225 *
3226 * parse an Notation attribute type.
3227 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003228 * Note: the leading 'NOTATION' S part has already being parsed...
3229 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003230 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3231 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003232 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003233 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00003234 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003235 *
3236 * Returns: the notation attribute tree built while parsing
3237 */
3238
3239xmlEnumerationPtr
3240xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3241 CHAR *name;
3242 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3243
3244 if (CUR != '(') {
3245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003246 ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003247 ctxt->wellFormed = 0;
3248 return(NULL);
3249 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003250 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003251 do {
3252 NEXT;
3253 SKIP_BLANKS;
3254 name = xmlParseName(ctxt);
3255 if (name == NULL) {
3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003257 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003258 "Name expected in NOTATION declaration\n");
3259 ctxt->wellFormed = 0;
3260 return(ret);
3261 }
3262 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003263 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003264 if (cur == NULL) return(ret);
3265 if (last == NULL) ret = last = cur;
3266 else {
3267 last->next = cur;
3268 last = cur;
3269 }
3270 SKIP_BLANKS;
3271 } while (CUR == '|');
3272 if (CUR != ')') {
3273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003274 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003275 "')' required to finish NOTATION declaration\n");
3276 ctxt->wellFormed = 0;
3277 return(ret);
3278 }
3279 NEXT;
3280 return(ret);
3281}
3282
3283/**
3284 * xmlParseEnumerationType:
3285 * @ctxt: an XML parser context
3286 *
3287 * parse an Enumeration attribute type.
3288 *
3289 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3290 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003291 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003292 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00003293 * the declaration
3294 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003295 * Returns: the enumeration attribute tree built while parsing
3296 */
3297
3298xmlEnumerationPtr
3299xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3300 CHAR *name;
3301 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3302
3303 if (CUR != '(') {
3304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003305 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003306 "'(' required to start ATTLIST enumeration\n");
3307 ctxt->wellFormed = 0;
3308 return(NULL);
3309 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003310 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003311 do {
3312 NEXT;
3313 SKIP_BLANKS;
3314 name = xmlParseNmtoken(ctxt);
3315 if (name == NULL) {
3316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003317 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003318 "NmToken expected in ATTLIST enumeration\n");
3319 ctxt->wellFormed = 0;
3320 return(ret);
3321 }
3322 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003323 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003324 if (cur == NULL) return(ret);
3325 if (last == NULL) ret = last = cur;
3326 else {
3327 last->next = cur;
3328 last = cur;
3329 }
3330 SKIP_BLANKS;
3331 } while (CUR == '|');
3332 if (CUR != ')') {
3333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003334 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003335 "')' required to finish ATTLIST enumeration\n");
3336 ctxt->wellFormed = 0;
3337 return(ret);
3338 }
3339 NEXT;
3340 return(ret);
3341}
3342
3343/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003344 * xmlParseEnumeratedType:
3345 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003346 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003347 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003348 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003349 *
3350 * [57] EnumeratedType ::= NotationType | Enumeration
3351 *
3352 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3353 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003354 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003355 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00003356 */
3357
Daniel Veillard1e346af1999-02-22 10:33:01 +00003358int
3359xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3360 if ((CUR == 'N') && (NXT(1) == 'O') &&
3361 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3362 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3363 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3364 SKIP(8);
3365 if (!IS_BLANK(CUR)) {
3366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003367 ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003368 ctxt->wellFormed = 0;
3369 return(0);
3370 }
3371 SKIP_BLANKS;
3372 *tree = xmlParseNotationType(ctxt);
3373 if (*tree == NULL) return(0);
3374 return(XML_ATTRIBUTE_NOTATION);
3375 }
3376 *tree = xmlParseEnumerationType(ctxt);
3377 if (*tree == NULL) return(0);
3378 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003379}
3380
Daniel Veillard11e00581998-10-24 18:27:49 +00003381/**
3382 * xmlParseAttributeType:
3383 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003384 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003385 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003386 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003387 *
3388 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3389 *
3390 * [55] StringType ::= 'CDATA'
3391 *
3392 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3393 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00003394 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003395 * Validity constraints for attribute values syntax are checked in
3396 * xmlValidateAttributeValue()
3397 *
3398 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003399 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00003400 * appear more than once in an XML document as a value of this type;
3401 * i.e., ID values must uniquely identify the elements which bear them.
3402 *
3403 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003404 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003405 *
3406 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003407 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003408 *
3409 * [ VC: IDREF ]
3410 * Values of type IDREF must match the Name production, and values
Daniel Veillardb96e6431999-08-29 21:02:19 +00003411 * of type IDREFS must match Names; TODO each IDREF Name must match the value
3412 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00003413 * values must match the value of some ID attribute.
3414 *
3415 * [ VC: Entity Name ]
3416 * Values of type ENTITY must match the Name production, values
Daniel Veillardb96e6431999-08-29 21:02:19 +00003417 * of type ENTITIES must match Names; TODO each Entity Name must match the
3418 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003419 *
3420 * [ VC: Name Token ]
3421 * Values of type NMTOKEN must match the Nmtoken production; values
3422 * of type NMTOKENS must match Nmtokens.
3423 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003424 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00003425 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003426int
Daniel Veillard1e346af1999-02-22 10:33:01 +00003427xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003428 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003429 if ((CUR == 'C') && (NXT(1) == 'D') &&
3430 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3431 (NXT(4) == 'A')) {
3432 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003433 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003434 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3435 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00003436 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3437 SKIP(6);
3438 return(XML_ATTRIBUTE_IDREFS);
3439 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3440 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003441 (NXT(4) == 'F')) {
3442 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003443 return(XML_ATTRIBUTE_IDREF);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003444 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3445 SKIP(2);
3446 return(XML_ATTRIBUTE_ID);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003447 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3448 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3449 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3450 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003451 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003452 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3453 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3454 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3455 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3456 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003457 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003458 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3459 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3460 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003461 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3462 SKIP(8);
3463 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003464 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3465 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3466 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003467 (NXT(6) == 'N')) {
3468 SKIP(7);
3469 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003470 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00003471 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003472}
3473
Daniel Veillard11e00581998-10-24 18:27:49 +00003474/**
3475 * xmlParseAttributeListDecl:
3476 * @ctxt: an XML parser context
3477 *
3478 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003479 *
3480 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3481 *
3482 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00003483 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003484 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003485void
3486xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003487 CHAR *elemName;
3488 CHAR *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003489 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003490
Daniel Veillard260a68f1998-08-13 03:39:55 +00003491 if ((CUR == '<') && (NXT(1) == '!') &&
3492 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3493 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3494 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003495 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003496 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003497 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003499 ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003500 ctxt->wellFormed = 0;
3501 }
3502 SKIP_BLANKS;
3503 elemName = xmlParseName(ctxt);
3504 if (elemName == NULL) {
3505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003506 ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003507 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003508 return;
3509 }
3510 SKIP_BLANKS;
3511 while (CUR != '>') {
3512 const CHAR *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003513 int type;
3514 int def;
3515 CHAR *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003516
Daniel Veillardb05deb71999-08-10 19:04:08 +00003517 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003518 attrName = xmlParseName(ctxt);
3519 if (attrName == NULL) {
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003521 ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003522 ctxt->wellFormed = 0;
3523 break;
3524 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00003525 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003526 if (!IS_BLANK(CUR)) {
3527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003528 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003529 "Space required after the attribute name\n");
3530 ctxt->wellFormed = 0;
3531 break;
3532 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003533 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003534
Daniel Veillard1e346af1999-02-22 10:33:01 +00003535 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003536 if (type <= 0) break;
3537
Daniel Veillardb05deb71999-08-10 19:04:08 +00003538 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003539 if (!IS_BLANK(CUR)) {
3540 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003541 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003542 "Space required after the attribute type\n");
3543 ctxt->wellFormed = 0;
3544 break;
3545 }
3546 SKIP_BLANKS;
3547
3548 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3549 if (def <= 0) break;
3550
Daniel Veillardb05deb71999-08-10 19:04:08 +00003551 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003552 if (CUR != '>') {
3553 if (!IS_BLANK(CUR)) {
3554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003555 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003556 "Space required after the attribute default value\n");
3557 ctxt->wellFormed = 0;
3558 break;
3559 }
3560 SKIP_BLANKS;
3561 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003562 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003564 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003565 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003566 break;
3567 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003568 if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003569 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003570 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003571 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003572 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003573 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00003574 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003575 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003576 }
3577 if (CUR == '>')
3578 NEXT;
3579
Daniel Veillard6454aec1999-09-02 22:04:43 +00003580 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003581 }
3582}
3583
Daniel Veillard11e00581998-10-24 18:27:49 +00003584/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003585 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00003586 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003587 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003588 * parse the declaration for a Mixed Element content
3589 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00003590 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003591 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3592 * '(' S? '#PCDATA' S? ')'
3593 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003594 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3595 *
3596 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003597 * The same name must not appear more than once in a single
3598 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00003599 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003600 * returns: the list of the xmlElementContentPtr describing the element choices
3601 */
3602xmlElementContentPtr
3603xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00003604 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003605 CHAR *elem = NULL;
3606
Daniel Veillardb05deb71999-08-10 19:04:08 +00003607 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003608 if ((CUR == '#') && (NXT(1) == 'P') &&
3609 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3610 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3611 (NXT(6) == 'A')) {
3612 SKIP(7);
3613 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003614 SHRINK;
Daniel Veillard3b9def11999-01-31 22:15:06 +00003615 if (CUR == ')') {
3616 NEXT;
3617 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3618 return(ret);
3619 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003620 if ((CUR == '(') || (CUR == '|')) {
3621 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3622 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003623 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003624 while (CUR == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00003625 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003626 if (elem == NULL) {
3627 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3628 if (ret == NULL) return(NULL);
3629 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00003630 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003631 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00003632 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3633 if (n == NULL) return(NULL);
3634 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3635 cur->c2 = n;
3636 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00003637 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003638 }
3639 SKIP_BLANKS;
3640 elem = xmlParseName(ctxt);
3641 if (elem == NULL) {
3642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003643 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003644 "xmlParseElementMixedContentDecl : Name expected\n");
3645 ctxt->wellFormed = 0;
3646 xmlFreeElementContent(cur);
3647 return(NULL);
3648 }
3649 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003650 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003651 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00003652 if ((CUR == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003653 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003654 cur->c2 = xmlNewElementContent(elem,
3655 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003656 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003657 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003658 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3659 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003660 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00003661 if (elem != NULL) xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003663 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00003664 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003665 ctxt->wellFormed = 0;
3666 xmlFreeElementContent(ret);
3667 return(NULL);
3668 }
3669
3670 } else {
3671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003672 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003673 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3674 ctxt->wellFormed = 0;
3675 }
3676 return(ret);
3677}
3678
3679/**
3680 * xmlParseElementChildrenContentDecl:
3681 * @ctxt: an XML parser context
3682 *
3683 * parse the declaration for a Mixed Element content
3684 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3685 *
3686 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003687 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3688 *
3689 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3690 *
3691 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3692 *
3693 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3694 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003695 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3696 * TODO Parameter-entity replacement text must be properly nested
3697 * with parenthetized groups. That is to say, if either of the
3698 * opening or closing parentheses in a choice, seq, or Mixed
3699 * construct is contained in the replacement text for a parameter
3700 * entity, both must be contained in the same replacement text. For
3701 * interoperability, if a parameter-entity reference appears in a
3702 * choice, seq, or Mixed construct, its replacement text should not
3703 * be empty, and neither the first nor last non-blank character of
3704 * the replacement text should be a connector (| or ,).
3705 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003706 * returns: the tree of xmlElementContentPtr describing the element
3707 * hierarchy.
3708 */
3709xmlElementContentPtr
3710xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
3711 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
3712 CHAR *elem;
3713 CHAR type = 0;
3714
3715 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003716 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003717 if (CUR == '(') {
3718 /* Recurse on first child */
3719 NEXT;
3720 SKIP_BLANKS;
3721 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3722 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003723 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003724 } else {
3725 elem = xmlParseName(ctxt);
3726 if (elem == NULL) {
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003728 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003729 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3730 ctxt->wellFormed = 0;
3731 return(NULL);
3732 }
3733 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003734 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003735 if (CUR == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003736 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003737 NEXT;
3738 } else if (CUR == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003739 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003740 NEXT;
3741 } else if (CUR == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003742 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003743 NEXT;
3744 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003745 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003746 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00003747 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003748 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003749 }
3750 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003751 SHRINK;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003752 while (CUR != ')') {
3753 /*
3754 * Each loop we parse one separator and one element.
3755 */
3756 if (CUR == ',') {
3757 if (type == 0) type = CUR;
3758
3759 /*
3760 * Detect "Name | Name , Name" error
3761 */
3762 else if (type != CUR) {
3763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003764 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003765 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3766 type);
3767 ctxt->wellFormed = 0;
3768 xmlFreeElementContent(ret);
3769 return(NULL);
3770 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003771 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003772
3773 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3774 if (op == NULL) {
3775 xmlFreeElementContent(ret);
3776 return(NULL);
3777 }
3778 if (last == NULL) {
3779 op->c1 = ret;
3780 ret = cur = op;
3781 } else {
3782 cur->c2 = op;
3783 op->c1 = last;
3784 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00003785 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003786 }
3787 } else if (CUR == '|') {
3788 if (type == 0) type = CUR;
3789
3790 /*
3791 * Detect "Name , Name | Name" error
3792 */
3793 else if (type != CUR) {
3794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003795 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003796 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3797 type);
3798 ctxt->wellFormed = 0;
3799 xmlFreeElementContent(ret);
3800 return(NULL);
3801 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003802 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003803
3804 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3805 if (op == NULL) {
3806 xmlFreeElementContent(ret);
3807 return(NULL);
3808 }
3809 if (last == NULL) {
3810 op->c1 = ret;
3811 ret = cur = op;
3812 } else {
3813 cur->c2 = op;
3814 op->c1 = last;
3815 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00003816 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003817 }
3818 } else {
3819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003820 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003821 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3822 ctxt->wellFormed = 0;
3823 xmlFreeElementContent(ret);
3824 return(NULL);
3825 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00003826 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003827 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003828 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003829 if (CUR == '(') {
3830 /* Recurse on second child */
3831 NEXT;
3832 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00003833 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003834 SKIP_BLANKS;
3835 } else {
3836 elem = xmlParseName(ctxt);
3837 if (elem == NULL) {
3838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003839 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003840 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3841 ctxt->wellFormed = 0;
3842 return(NULL);
3843 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003844 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00003845 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003846 if (CUR == '?') {
3847 last->ocur = XML_ELEMENT_CONTENT_OPT;
3848 NEXT;
3849 } else if (CUR == '*') {
3850 last->ocur = XML_ELEMENT_CONTENT_MULT;
3851 NEXT;
3852 } else if (CUR == '+') {
3853 last->ocur = XML_ELEMENT_CONTENT_PLUS;
3854 NEXT;
3855 } else {
3856 last->ocur = XML_ELEMENT_CONTENT_ONCE;
3857 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003858 }
3859 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003860 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003861 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003862 if ((cur != NULL) && (last != NULL)) {
3863 cur->c2 = last;
3864 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003865 NEXT;
3866 if (CUR == '?') {
3867 ret->ocur = XML_ELEMENT_CONTENT_OPT;
3868 NEXT;
3869 } else if (CUR == '*') {
3870 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3871 NEXT;
3872 } else if (CUR == '+') {
3873 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3874 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003875 }
3876 return(ret);
3877}
3878
3879/**
3880 * xmlParseElementContentDecl:
3881 * @ctxt: an XML parser context
3882 * @name: the name of the element being defined.
3883 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00003884 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003885 * parse the declaration for an Element content either Mixed or Children,
3886 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3887 *
3888 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00003889 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003890 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00003891 */
3892
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003893int
3894xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3895 xmlElementContentPtr *result) {
3896
3897 xmlElementContentPtr tree = NULL;
3898 int res;
3899
3900 *result = NULL;
3901
3902 if (CUR != '(') {
3903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003904 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003905 "xmlParseElementContentDecl : '(' expected\n");
3906 ctxt->wellFormed = 0;
3907 return(-1);
3908 }
3909 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003910 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003911 SKIP_BLANKS;
3912 if ((CUR == '#') && (NXT(1) == 'P') &&
3913 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3914 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3915 (NXT(6) == 'A')) {
3916 tree = xmlParseElementMixedContentDecl(ctxt);
3917 res = XML_ELEMENT_TYPE_MIXED;
3918 } else {
3919 tree = xmlParseElementChildrenContentDecl(ctxt);
3920 res = XML_ELEMENT_TYPE_ELEMENT;
3921 }
3922 SKIP_BLANKS;
3923 /****************************
3924 if (CUR != ')') {
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003926 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003927 "xmlParseElementContentDecl : ')' expected\n");
3928 ctxt->wellFormed = 0;
3929 return(-1);
3930 }
3931 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00003932 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003933 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003934}
3935
Daniel Veillard11e00581998-10-24 18:27:49 +00003936/**
3937 * xmlParseElementDecl:
3938 * @ctxt: an XML parser context
3939 *
3940 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003941 *
3942 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3943 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003944 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00003945 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00003946 *
3947 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00003948 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003949int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003950xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003951 CHAR *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003952 int ret = -1;
3953 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003954
Daniel Veillardb05deb71999-08-10 19:04:08 +00003955 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003956 if ((CUR == '<') && (NXT(1) == '!') &&
3957 (NXT(2) == 'E') && (NXT(3) == 'L') &&
3958 (NXT(4) == 'E') && (NXT(5) == 'M') &&
3959 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003960 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003961 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003962 if (!IS_BLANK(CUR)) {
3963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003964 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003965 "Space required after 'ELEMENT'\n");
3966 ctxt->wellFormed = 0;
3967 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003968 SKIP_BLANKS;
3969 name = xmlParseName(ctxt);
3970 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003971 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003972 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003973 "xmlParseElementDecl: no name for Element\n");
3974 ctxt->wellFormed = 0;
3975 return(-1);
3976 }
3977 if (!IS_BLANK(CUR)) {
3978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003979 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003980 "Space required after the element name\n");
3981 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003982 }
3983 SKIP_BLANKS;
3984 if ((CUR == 'E') && (NXT(1) == 'M') &&
3985 (NXT(2) == 'P') && (NXT(3) == 'T') &&
3986 (NXT(4) == 'Y')) {
3987 SKIP(5);
3988 /*
3989 * Element must always be empty.
3990 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003991 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003992 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3993 (NXT(2) == 'Y')) {
3994 SKIP(3);
3995 /*
3996 * Element is a generic container.
3997 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003998 ret = XML_ELEMENT_TYPE_ANY;
3999 } else if (CUR == '(') {
4000 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004001 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004002 /*
4003 * [ WFC: PEs in Internal Subset ] error handling.
4004 */
4005 if ((CUR == '%') && (ctxt->external == 0) &&
4006 (ctxt->inputNr == 1)) {
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "PEReference: forbidden within markup decl in internal subset\n");
4010 } else {
4011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4012 ctxt->sax->error(ctxt->userData,
4013 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4014 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004015 ctxt->wellFormed = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004016 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004017 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004018 }
4019 SKIP_BLANKS;
4020 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004022 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004023 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004024 ctxt->wellFormed = 0;
4025 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004026 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00004027 if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004028 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4029 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004030 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004031 if (content != NULL) {
4032 xmlFreeElementContent(content);
4033 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004034 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00004035 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004036 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004037 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004038 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004039}
4040
Daniel Veillard11e00581998-10-24 18:27:49 +00004041/**
4042 * xmlParseMarkupDecl:
4043 * @ctxt: an XML parser context
4044 *
4045 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004046 *
4047 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4048 * NotationDecl | PI | Comment
4049 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004050 * [ VC: Proper Declaration/PE Nesting ]
4051 * TODO Parameter-entity replacement text must be properly nested with
4052 * markup declarations. That is to say, if either the first character
4053 * or the last character of a markup declaration (markupdecl above) is
4054 * contained in the replacement text for a parameter-entity reference,
4055 * both must be contained in the same replacement text.
4056 *
4057 * [ WFC: PEs in Internal Subset ]
4058 * In the internal DTD subset, parameter-entity references can occur
4059 * only where markup declarations can occur, not within markup declarations.
4060 * (This does not apply to references that occur in external parameter
4061 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004062 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004063void
4064xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004065 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004066 xmlParseElementDecl(ctxt);
4067 xmlParseAttributeListDecl(ctxt);
4068 xmlParseEntityDecl(ctxt);
4069 xmlParseNotationDecl(ctxt);
4070 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00004071 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004072 /*
4073 * This is only for internal subset. On external entities,
4074 * the replacement is done before parsing stage
4075 */
4076 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4077 xmlParsePEReference(ctxt);
4078 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004079}
4080
Daniel Veillard11e00581998-10-24 18:27:49 +00004081/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004082 * xmlParseTextDecl:
4083 * @ctxt: an XML parser context
4084 *
4085 * parse an XML declaration header for external entities
4086 *
4087 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4088 *
4089 * Returns the only valuable info for an external parsed entity, the encoding
4090 */
4091
4092CHAR *
4093xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4094 CHAR *version;
4095 CHAR *encoding = NULL;
4096
4097 /*
4098 * We know that '<?xml' is here.
4099 */
4100 SKIP(5);
4101
4102 if (!IS_BLANK(CUR)) {
4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4105 ctxt->wellFormed = 0;
4106 }
4107 SKIP_BLANKS;
4108
4109 /*
4110 * We may have the VersionInfo here.
4111 */
4112 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004113 if (version == NULL)
4114 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4115 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004116 xmlFree(version);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004117
4118 /*
4119 * We must have the encoding declaration
4120 */
4121 if (!IS_BLANK(CUR)) {
4122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4123 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4124 ctxt->wellFormed = 0;
4125 }
4126 encoding = xmlParseEncodingDecl(ctxt);
4127
4128 SKIP_BLANKS;
4129 if ((CUR == '?') && (NXT(1) == '>')) {
4130 SKIP(2);
4131 } else if (CUR == '>') {
4132 /* Deprecated old WD ... */
4133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4134 ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4135 ctxt->wellFormed = 0;
4136 NEXT;
4137 } else {
4138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4139 ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4140 ctxt->wellFormed = 0;
4141 MOVETO_ENDTAG(CUR_PTR);
4142 NEXT;
4143 }
4144 return(encoding);
4145}
4146
4147/*
4148 * xmlParseConditionalSections
4149 * @ctxt: an XML parser context
4150 *
4151 * TODO : Conditionnal section are not yet supported !
4152 *
4153 * [61] conditionalSect ::= includeSect | ignoreSect
4154 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4155 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4156 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4157 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4158 */
4159
4160void
4161xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4162 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4163 ctxt->sax->warning(ctxt->userData,
4164 "XML conditional section not supported\n");
4165 /*
4166 * Skip up to the end of the conditionnal section.
4167 */
4168 while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4169 NEXT;
4170 if (CUR == 0) {
4171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4172 ctxt->sax->error(ctxt->userData,
4173 "XML conditional section not closed\n");
4174 ctxt->wellFormed = 0;
4175 }
4176}
4177
4178/**
4179 * xmlParseExternalSubset
4180 * @ctxt: an XML parser context
4181 *
4182 * parse Markup declarations from an external subset
4183 *
4184 * [30] extSubset ::= textDecl? extSubsetDecl
4185 *
4186 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004187 */
4188void
4189xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4190 const CHAR *SystemID) {
4191 if ((CUR == '<') && (NXT(1) == '?') &&
4192 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4193 (NXT(4) == 'l')) {
4194 xmlParseTextDecl(ctxt);
4195 }
4196 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004197 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00004198 }
4199 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4200 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4201
Daniel Veillardb05deb71999-08-10 19:04:08 +00004202 ctxt->instate = XML_PARSER_DTD;
4203 ctxt->external = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004204 while (((CUR == '<') && (NXT(1) == '?')) ||
4205 ((CUR == '<') && (NXT(1) == '!')) ||
4206 IS_BLANK(CUR)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004207 const CHAR *check = CUR_PTR;
4208 int cons = ctxt->input->consumed;
4209
Daniel Veillard011b63c1999-06-02 17:44:04 +00004210 if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4211 xmlParseConditionalSections(ctxt);
4212 } else if (IS_BLANK(CUR)) {
4213 NEXT;
4214 } else if (CUR == '%') {
4215 xmlParsePEReference(ctxt);
4216 } else
4217 xmlParseMarkupDecl(ctxt);
4218
4219 /*
4220 * Pop-up of finished entities.
4221 */
4222 while ((CUR == 0) && (ctxt->inputNr > 1))
4223 xmlPopInput(ctxt);
4224
Daniel Veillardb96e6431999-08-29 21:02:19 +00004225 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
4226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4227 ctxt->sax->error(ctxt->userData,
4228 "Content error in the external subset\n");
4229 ctxt->wellFormed = 0;
4230 break;
4231 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004232 }
4233
4234 if (CUR != 0) {
4235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4236 ctxt->sax->error(ctxt->userData,
4237 "Extra content at the end of the document\n");
4238 ctxt->wellFormed = 0;
4239 }
4240
4241}
4242
4243/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004244 * xmlParseReference:
4245 * @ctxt: an XML parser context
4246 *
4247 * parse and handle entity references in content, depending on the SAX
4248 * interface, this may end-up in a call to character() if this is a
4249 * CharRef, a predefined entity, if there is no reference() callback.
4250 * or if the parser was asked to switch to that mode.
4251 *
4252 * [67] Reference ::= EntityRef | CharRef
4253 */
4254void
4255xmlParseReference(xmlParserCtxtPtr ctxt) {
4256 xmlEntityPtr ent;
4257 CHAR *val;
4258 if (CUR != '&') return;
4259
Daniel Veillardb96e6431999-08-29 21:02:19 +00004260 if (ctxt->inputNr > 1) {
4261 CHAR cur[2] = { '&' , 0 } ;
4262
4263 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4264 ctxt->sax->characters(ctxt->userData, cur, 1);
4265 if (ctxt->token == '&')
4266 ctxt->token = 0;
4267 else {
4268 SKIP(1);
4269 }
4270 return;
4271 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004272 if (NXT(1) == '#') {
4273 CHAR out[2];
4274 int val = xmlParseCharRef(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00004275 /* invalid for UTF-8 variable encoding !!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004276 out[0] = val;
4277 out[1] = 0;
4278 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4279 ctxt->sax->characters(ctxt->userData, out, 1);
4280 } else {
4281 ent = xmlParseEntityRef(ctxt);
4282 if (ent == NULL) return;
4283 if ((ent->name != NULL) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00004284 (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
4285 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4286 (ctxt->replaceEntities == 0)) {
4287 /*
4288 * Create a node.
4289 */
4290 ctxt->sax->reference(ctxt->userData, ent->name);
4291 return;
4292 } else if (ctxt->replaceEntities) {
4293 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004294
Daniel Veillardb96e6431999-08-29 21:02:19 +00004295 input = xmlNewEntityInputStream(ctxt, ent);
4296 xmlPushInput(ctxt, input);
4297 return;
4298 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004299 }
4300 val = ent->content;
4301 if (val == NULL) return;
4302 /*
4303 * inline the entity.
4304 */
4305 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4306 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4307 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004308}
4309
Daniel Veillard11e00581998-10-24 18:27:49 +00004310/**
4311 * xmlParseEntityRef:
4312 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004313 *
4314 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004315 *
4316 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004317 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004318 * [ WFC: Entity Declared ]
4319 * In a document without any DTD, a document with only an internal DTD
4320 * subset which contains no parameter entity references, or a document
4321 * with "standalone='yes'", the Name given in the entity reference
4322 * must match that in an entity declaration, except that well-formed
4323 * documents need not declare any of the following entities: amp, lt,
4324 * gt, apos, quot. The declaration of a parameter entity must precede
4325 * any reference to it. Similarly, the declaration of a general entity
4326 * must precede any reference to it which appears in a default value in an
4327 * attribute-list declaration. Note that if entities are declared in the
4328 * external subset or in external parameter entities, a non-validating
4329 * processor is not obligated to read and process their declarations;
4330 * for such documents, the rule that an entity must be declared is a
4331 * well-formedness constraint only if standalone='yes'.
4332 *
4333 * [ WFC: Parsed Entity ]
4334 * An entity reference must not contain the name of an unparsed entity
4335 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004336 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004337 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004338xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004339xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004340 CHAR *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00004341 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004342
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004343 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004344
Daniel Veillard260a68f1998-08-13 03:39:55 +00004345 if (CUR == '&') {
4346 NEXT;
4347 name = xmlParseName(ctxt);
4348 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004350 ctxt->sax->error(ctxt->userData,
4351 "xmlParseEntityRef: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004352 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004353 } else {
4354 if (CUR == ';') {
4355 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004356 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00004357 * Ask first SAX for entity resolution, otherwise try the
4358 * predefined set.
4359 */
4360 if (ctxt->sax != NULL) {
4361 if (ctxt->sax->getEntity != NULL)
4362 ent = ctxt->sax->getEntity(ctxt->userData, name);
4363 if (ent == NULL)
4364 ent = xmlGetPredefinedEntity(name);
4365 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004366 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004367 * [ WFC: Entity Declared ]
4368 * In a document without any DTD, a document with only an
4369 * internal DTD subset which contains no parameter entity
4370 * references, or a document with "standalone='yes'", the
4371 * Name given in the entity reference must match that in an
4372 * entity declaration, except that well-formed documents
4373 * need not declare any of the following entities: amp, lt,
4374 * gt, apos, quot.
4375 * The declaration of a parameter entity must precede any
4376 * reference to it.
4377 * Similarly, the declaration of a general entity must
4378 * precede any reference to it which appears in a default
4379 * value in an attribute-list declaration. Note that if
4380 * entities are declared in the external subset or in
4381 * external parameter entities, a non-validating processor
4382 * is not obligated to read and process their declarations;
4383 * for such documents, the rule that an entity must be
4384 * declared is a well-formedness constraint only if
4385 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004386 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004387 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004388 if ((ctxt->standalone == 1) ||
4389 ((ctxt->hasExternalSubset == 0) &&
4390 (ctxt->hasPErefs == 0))) {
4391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004392 ctxt->sax->error(ctxt->userData,
4393 "Entity '%s' not defined\n", name);
4394 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004395 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004396 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4397 ctxt->sax->warning(ctxt->userData,
4398 "Entity '%s' not defined\n", name);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004399 }
4400 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004401
4402 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004403 * [ WFC: Parsed Entity ]
4404 * An entity reference must not contain the name of an
4405 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004406 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00004407 else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4409 ctxt->sax->error(ctxt->userData,
4410 "Entity reference to unparsed entity %s\n", name);
4411 ctxt->wellFormed = 0;
4412 }
4413
4414 /*
4415 * [ WFC: No External Entity References ]
4416 * Attribute values cannot contain direct or indirect
4417 * entity references to external entities.
4418 */
4419 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4420 (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4421 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4422 ctxt->sax->error(ctxt->userData,
4423 "Attribute references external entity '%s'\n", name);
4424 ctxt->wellFormed = 0;
4425 }
4426 /*
4427 * [ WFC: No < in Attribute Values ]
4428 * The replacement text of any entity referred to directly or
4429 * indirectly in an attribute value (other than "&lt;") must
4430 * not contain a <.
4431 */
4432 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00004433 (ent != NULL) &&
4434 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00004435 (ent->content != NULL) &&
4436 (xmlStrchr(ent->content, '<'))) {
4437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4438 ctxt->sax->error(ctxt->userData,
4439 "'<' in entity '%s' is not allowed in attributes values\n", name);
4440 ctxt->wellFormed = 0;
4441 }
4442
4443 /*
4444 * Internal check, no parameter entities here ...
4445 */
4446 else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004447 switch (ent->type) {
4448 case XML_INTERNAL_PARAMETER_ENTITY:
4449 case XML_EXTERNAL_PARAMETER_ENTITY:
4450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004451 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004452 "Attempt to reference the parameter entity '%s'\n", name);
4453 ctxt->wellFormed = 0;
4454 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004455 }
4456 }
4457
4458 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004459 * [ WFC: No Recursion ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004460 * TODO A parsed entity must not contain a recursive reference
4461 * to itself, either directly or indirectly.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004462 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00004463
Daniel Veillard011b63c1999-06-02 17:44:04 +00004464 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004466 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004467 "xmlParseEntityRef: expecting ';'\n");
4468 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004469 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00004470 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004471 }
4472 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004473 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004474}
4475
Daniel Veillard11e00581998-10-24 18:27:49 +00004476/**
4477 * xmlParsePEReference:
4478 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004479 *
4480 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00004481 * The entity content is handled directly by pushing it's content as
4482 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004483 *
4484 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004485 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004486 * [ WFC: No Recursion ]
4487 * TODO A parsed entity must not contain a recursive
4488 * reference to itself, either directly or indirectly.
4489 *
4490 * [ WFC: Entity Declared ]
4491 * In a document without any DTD, a document with only an internal DTD
4492 * subset which contains no parameter entity references, or a document
4493 * with "standalone='yes'", ... ... The declaration of a parameter
4494 * entity must precede any reference to it...
4495 *
4496 * [ VC: Entity Declared ]
4497 * In a document with an external subset or external parameter entities
4498 * with "standalone='no'", ... ... The declaration of a parameter entity
4499 * must precede any reference to it...
4500 *
4501 * [ WFC: In DTD ]
4502 * Parameter-entity references may only appear in the DTD.
4503 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004504 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004505void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004506xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004507 CHAR *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00004508 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00004509 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004510
4511 if (CUR == '%') {
4512 NEXT;
4513 name = xmlParseName(ctxt);
4514 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004516 ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004517 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004518 } else {
4519 if (CUR == ';') {
4520 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004521 if ((ctxt->sax != NULL) &&
4522 (ctxt->sax->getParameterEntity != NULL))
4523 entity = ctxt->sax->getParameterEntity(ctxt->userData,
4524 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004525 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004526 /*
4527 * [ WFC: Entity Declared ]
4528 * In a document without any DTD, a document with only an
4529 * internal DTD subset which contains no parameter entity
4530 * references, or a document with "standalone='yes'", ...
4531 * ... The declaration of a parameter entity must precede
4532 * any reference to it...
4533 */
4534 if ((ctxt->standalone == 1) ||
4535 ((ctxt->hasExternalSubset == 0) &&
4536 (ctxt->hasPErefs == 0))) {
4537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4538 ctxt->sax->error(ctxt->userData,
4539 "PEReference: %%%s; not found\n", name);
4540 ctxt->wellFormed = 0;
4541 } else {
4542 /*
4543 * [ VC: Entity Declared ]
4544 * In a document with an external subset or external
4545 * parameter entities with "standalone='no'", ...
4546 * ... The declaration of a parameter entity must precede
4547 * any reference to it...
4548 */
4549 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4550 ctxt->sax->warning(ctxt->userData,
4551 "PEReference: %%%s; not found\n", name);
4552 ctxt->valid = 0;
4553 }
Daniel Veillardccb09631998-10-27 06:21:04 +00004554 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004555 /*
4556 * Internal checking in case the entity quest barfed
4557 */
4558 if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4559 (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4560 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4561 ctxt->sax->warning(ctxt->userData,
4562 "Internal: %%%s; is not a parameter entity\n", name);
4563 } else {
4564 input = xmlNewEntityInputStream(ctxt, entity);
4565 xmlPushInput(ctxt, input);
4566 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004567 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004568 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004569 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004571 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004572 "xmlParsePEReference: expecting ';'\n");
4573 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004574 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00004575 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004576 }
4577 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004578}
4579
Daniel Veillard11e00581998-10-24 18:27:49 +00004580/**
4581 * xmlParseDocTypeDecl :
4582 * @ctxt: an XML parser context
4583 *
4584 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00004585 *
4586 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4587 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00004588 *
4589 * [ VC: Root Element Type ]
4590 * The Name in the document type declaration must match the element
4591 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004592 */
4593
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004594void
4595xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004596 CHAR *name;
4597 CHAR *ExternalID = NULL;
4598 CHAR *URI = NULL;
4599
4600 /*
4601 * We know that '<!DOCTYPE' has been detected.
4602 */
4603 SKIP(9);
4604
4605 SKIP_BLANKS;
4606
4607 /*
4608 * Parse the DOCTYPE name.
4609 */
4610 name = xmlParseName(ctxt);
4611 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004612 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004613 ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004614 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004615 }
4616
4617 SKIP_BLANKS;
4618
4619 /*
4620 * Check for SystemID and ExternalID
4621 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00004622 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004623
4624 if ((URI != NULL) || (ExternalID != NULL)) {
4625 ctxt->hasExternalSubset = 1;
4626 }
4627
Daniel Veillard260a68f1998-08-13 03:39:55 +00004628 SKIP_BLANKS;
4629
Daniel Veillard011b63c1999-06-02 17:44:04 +00004630 /*
4631 * NOTE: the SAX callback may try to fetch the external subset
4632 * entity and fill it up !
4633 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004634 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004635 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004636
4637 /*
4638 * Is there any DTD definition ?
4639 */
4640 if (CUR == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004641 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004642 NEXT;
4643 /*
4644 * Parse the succession of Markup declarations and
4645 * PEReferences.
4646 * Subsequence (markupdecl | PEReference | S)*
4647 */
4648 while (CUR != ']') {
4649 const CHAR *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004650 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004651
4652 SKIP_BLANKS;
4653 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00004654 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004655
Daniel Veillard011b63c1999-06-02 17:44:04 +00004656 /*
4657 * Pop-up of finished entities.
4658 */
4659 while ((CUR == 0) && (ctxt->inputNr > 1))
4660 xmlPopInput(ctxt);
4661
Daniel Veillardc26087b1999-08-30 11:23:51 +00004662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4664 ctxt->sax->error(ctxt->userData,
4665 "xmlParseDocTypeDecl: error detected in Markup declaration\n");
4666 ctxt->wellFormed = 0;
4667 break;
4668 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004669 }
4670 if (CUR == ']') NEXT;
4671 }
4672
4673 /*
4674 * We should be at the end of the DOCTYPE declaration.
4675 */
4676 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004678 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004679 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004680 /* We shouldn't try to resynchronize ... */
4681 }
4682 NEXT;
4683
4684 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004685 * Cleanup
Daniel Veillard260a68f1998-08-13 03:39:55 +00004686 */
Daniel Veillard6454aec1999-09-02 22:04:43 +00004687 if (URI != NULL) xmlFree(URI);
4688 if (ExternalID != NULL) xmlFree(ExternalID);
4689 if (name != NULL) xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004690}
4691
Daniel Veillard11e00581998-10-24 18:27:49 +00004692/**
4693 * xmlParseAttribute:
4694 * @ctxt: an XML parser context
Daniel Veillard517752b1999-04-05 12:20:10 +00004695 * @value: a CHAR ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00004696 *
4697 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00004698 *
4699 * [41] Attribute ::= Name Eq AttValue
4700 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004701 * [ WFC: No External Entity References ]
4702 * Attribute values cannot contain direct or indirect entity references
4703 * to external entities.
4704 *
4705 * [ WFC: No < in Attribute Values ]
4706 * The replacement text of any entity referred to directly or indirectly in
4707 * an attribute value (other than "&lt;") must not contain a <.
4708 *
4709 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004710 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00004711 * declared for it.
4712 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004713 * [25] Eq ::= S? '=' S?
4714 *
4715 * With namespace:
4716 *
4717 * [NS 11] Attribute ::= QName Eq AttValue
4718 *
4719 * Also the case QName == xmlns:??? is handled independently as a namespace
4720 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00004721 *
Daniel Veillard517752b1999-04-05 12:20:10 +00004722 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004723 */
4724
Daniel Veillard517752b1999-04-05 12:20:10 +00004725CHAR *
4726xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004727 CHAR *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004728
Daniel Veillard517752b1999-04-05 12:20:10 +00004729 *value = NULL;
4730 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004731 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004733 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004734 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00004735 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004736 }
4737
4738 /*
4739 * read the value
4740 */
4741 SKIP_BLANKS;
4742 if (CUR == '=') {
4743 NEXT;
4744 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00004745 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004746 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004747 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004749 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004750 "Specification mandate value for attribute %s\n", name);
4751 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00004752 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004753 }
4754
Daniel Veillard517752b1999-04-05 12:20:10 +00004755 *value = val;
4756 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004757}
4758
Daniel Veillard11e00581998-10-24 18:27:49 +00004759/**
4760 * xmlParseStartTag:
4761 * @ctxt: an XML parser context
4762 *
4763 * parse a start of tag either for rule element or
4764 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004765 *
4766 * [40] STag ::= '<' Name (S Attribute)* S? '>'
4767 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004768 * [ WFC: Unique Att Spec ]
4769 * No attribute name may appear more than once in the same start-tag or
4770 * empty-element tag.
4771 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004772 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4773 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004774 * [ WFC: Unique Att Spec ]
4775 * No attribute name may appear more than once in the same start-tag or
4776 * empty-element tag.
4777 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004778 * With namespace:
4779 *
4780 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4781 *
4782 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00004783 *
4784 * Returns the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00004785 */
4786
Daniel Veillard14fff061999-06-22 21:49:07 +00004787CHAR *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004788xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard517752b1999-04-05 12:20:10 +00004789 CHAR *name;
4790 CHAR *attname;
4791 CHAR *attvalue;
4792 const CHAR **atts = NULL;
4793 int nbatts = 0;
4794 int maxatts = 0;
4795 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004796
Daniel Veillard14fff061999-06-22 21:49:07 +00004797 if (CUR != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004798 NEXT;
4799
Daniel Veillard517752b1999-04-05 12:20:10 +00004800 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004801 if (name == NULL) {
4802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004803 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004804 "xmlParseStartTag: invalid element name\n");
4805 ctxt->wellFormed = 0;
Daniel Veillard14fff061999-06-22 21:49:07 +00004806 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004807 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004808
4809 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00004810 * Now parse the attributes, it ends up with the ending
4811 *
4812 * (S Attribute)* S?
4813 */
4814 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004815 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004816 while ((IS_CHAR(CUR)) &&
4817 (CUR != '>') &&
4818 ((CUR != '/') || (NXT(1) != '>'))) {
4819 const CHAR *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004820 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004821
Daniel Veillard517752b1999-04-05 12:20:10 +00004822 attname = xmlParseAttribute(ctxt, &attvalue);
4823 if ((attname != NULL) && (attvalue != NULL)) {
4824 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004825 * [ WFC: Unique Att Spec ]
4826 * No attribute name may appear more than once in the same
4827 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00004828 */
4829 for (i = 0; i < nbatts;i += 2) {
4830 if (!xmlStrcmp(atts[i], attname)) {
4831 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004832 ctxt->sax->error(ctxt->userData,
4833 "Attribute %s redefined\n",
4834 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00004835 ctxt->wellFormed = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004836 xmlFree(attname);
4837 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004838 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00004839 }
4840 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004841
Daniel Veillard517752b1999-04-05 12:20:10 +00004842 /*
4843 * Add the pair to atts
4844 */
4845 if (atts == NULL) {
4846 maxatts = 10;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004847 atts = (const CHAR **) xmlMalloc(maxatts * sizeof(CHAR *));
Daniel Veillard517752b1999-04-05 12:20:10 +00004848 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00004849 fprintf(stderr, "malloc of %ld byte failed\n",
4850 maxatts * (long)sizeof(CHAR *));
Daniel Veillard14fff061999-06-22 21:49:07 +00004851 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00004852 }
4853 } else if (nbatts + 2 < maxatts) {
4854 maxatts *= 2;
Daniel Veillard6454aec1999-09-02 22:04:43 +00004855 atts = (const CHAR **) xmlRealloc(atts, maxatts * sizeof(CHAR *));
Daniel Veillard517752b1999-04-05 12:20:10 +00004856 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00004857 fprintf(stderr, "realloc of %ld byte failed\n",
4858 maxatts * (long)sizeof(CHAR *));
Daniel Veillard14fff061999-06-22 21:49:07 +00004859 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00004860 }
4861 }
4862 atts[nbatts++] = attname;
4863 atts[nbatts++] = attvalue;
4864 atts[nbatts] = NULL;
4865 atts[nbatts + 1] = NULL;
4866 }
4867
Daniel Veillardb96e6431999-08-29 21:02:19 +00004868failed:
Daniel Veillard517752b1999-04-05 12:20:10 +00004869 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004870 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004872 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004873 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004874 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004875 break;
4876 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004877 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004878 }
4879
4880 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00004881 * SAX: Start of Element !
4882 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004883 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004884 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00004885
Daniel Veillard517752b1999-04-05 12:20:10 +00004886 if (atts != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00004887 for (i = 0;i < nbatts;i++) xmlFree((CHAR *) atts[i]);
4888 xmlFree(atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00004889 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004890 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004891}
4892
Daniel Veillard11e00581998-10-24 18:27:49 +00004893/**
4894 * xmlParseEndTag:
4895 * @ctxt: an XML parser context
Daniel Veillard14fff061999-06-22 21:49:07 +00004896 * @tagname: the tag name as parsed in the opening tag.
Daniel Veillard11e00581998-10-24 18:27:49 +00004897 *
4898 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00004899 *
4900 * [42] ETag ::= '</' Name S? '>'
4901 *
4902 * With namespace
4903 *
Daniel Veillard517752b1999-04-05 12:20:10 +00004904 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00004905 */
4906
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004907void
Daniel Veillard14fff061999-06-22 21:49:07 +00004908xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
Daniel Veillard517752b1999-04-05 12:20:10 +00004909 CHAR *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004910
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004911 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004912 if ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004913 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004914 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004915 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004916 return;
4917 }
4918 SKIP(2);
4919
Daniel Veillard517752b1999-04-05 12:20:10 +00004920 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004921
4922 /*
4923 * We should definitely be at the ending "S? '>'" part
4924 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004925 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004926 SKIP_BLANKS;
4927 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004929 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004930 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004931 } else
4932 NEXT;
4933
Daniel Veillard517752b1999-04-05 12:20:10 +00004934 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004935 * [ WFC: Element Type Match ]
4936 * The Name in an element's end-tag must match the element type in the
4937 * start-tag.
4938 *
Daniel Veillard14fff061999-06-22 21:49:07 +00004939 */
4940 if (xmlStrcmp(name, tagname)) {
4941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4942 ctxt->sax->error(ctxt->userData,
4943 "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4944 ctxt->wellFormed = 0;
4945 }
4946
4947 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00004948 * SAX: End of Tag
4949 */
4950 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004951 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00004952
4953 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00004954 xmlFree(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00004955
Daniel Veillard260a68f1998-08-13 03:39:55 +00004956 return;
4957}
4958
Daniel Veillard11e00581998-10-24 18:27:49 +00004959/**
4960 * xmlParseCDSect:
4961 * @ctxt: an XML parser context
4962 *
4963 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004964 *
4965 * [18] CDSect ::= CDStart CData CDEnd
4966 *
4967 * [19] CDStart ::= '<![CDATA['
4968 *
4969 * [20] Data ::= (Char* - (Char* ']]>' Char*))
4970 *
4971 * [21] CDEnd ::= ']]>'
4972 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004973void
4974xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004975 const CHAR *base;
4976 CHAR r, s;
4977 CHAR cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004978
Daniel Veillardb05deb71999-08-10 19:04:08 +00004979 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004980 (NXT(2) == '[') && (NXT(3) == 'C') &&
4981 (NXT(4) == 'D') && (NXT(5) == 'A') &&
4982 (NXT(6) == 'T') && (NXT(7) == 'A') &&
4983 (NXT(8) == '[')) {
4984 SKIP(9);
4985 } else
4986 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004987
4988 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004989 base = CUR_PTR;
4990 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004992 ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004993 ctxt->wellFormed = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004994 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004995 return;
4996 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004997 r = CUR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004998 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004999 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005001 ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005002 ctxt->wellFormed = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005003 ctxt->instate = XML_PARSER_CONTENT;
5004 return;
5005 }
5006 s = CUR;
5007 NEXT;
5008 cur = CUR;
5009 while (IS_CHAR(cur) &&
5010 ((r != ']') || (s != ']') || (cur != '>'))) {
5011 r = s;
5012 s = cur;
5013 NEXT;
5014 cur = CUR;
5015 }
5016 ctxt->instate = XML_PARSER_CONTENT;
5017 if (!IS_CHAR(CUR)) {
5018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5019 ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
5020 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005021 return;
5022 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005023 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005024
5025 /*
5026 * Ok the segment [base CUR_PTR] is to be consumed as chars.
5027 */
5028 if (ctxt->sax != NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005029 if (ctxt->sax->cdataBlock != NULL)
5030 ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005031 }
5032}
5033
Daniel Veillard11e00581998-10-24 18:27:49 +00005034/**
5035 * xmlParseContent:
5036 * @ctxt: an XML parser context
5037 *
5038 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00005039 *
5040 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
5041 */
5042
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005043void
5044xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005045 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005046 while ((CUR != '<') || (NXT(1) != '/')) {
5047 const CHAR *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005048 int cons = ctxt->input->consumed;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005049 CHAR tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005050
5051 /*
5052 * First case : a Processing Instruction.
5053 */
5054 if ((CUR == '<') && (NXT(1) == '?')) {
5055 xmlParsePI(ctxt);
5056 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005057
Daniel Veillard260a68f1998-08-13 03:39:55 +00005058 /*
5059 * Second case : a CDSection
5060 */
5061 else if ((CUR == '<') && (NXT(1) == '!') &&
5062 (NXT(2) == '[') && (NXT(3) == 'C') &&
5063 (NXT(4) == 'D') && (NXT(5) == 'A') &&
5064 (NXT(6) == 'T') && (NXT(7) == 'A') &&
5065 (NXT(8) == '[')) {
5066 xmlParseCDSect(ctxt);
5067 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005068
Daniel Veillard260a68f1998-08-13 03:39:55 +00005069 /*
5070 * Third case : a comment
5071 */
5072 else if ((CUR == '<') && (NXT(1) == '!') &&
5073 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00005074 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005075 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005076 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005077
Daniel Veillard260a68f1998-08-13 03:39:55 +00005078 /*
5079 * Fourth case : a sub-element.
5080 */
5081 else if (CUR == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00005082 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005083 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005084
Daniel Veillard260a68f1998-08-13 03:39:55 +00005085 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00005086 * Fifth case : a reference. If if has not been resolved,
5087 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00005088 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00005089
Daniel Veillard260a68f1998-08-13 03:39:55 +00005090 else if (CUR == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005091 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005092 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005093
Daniel Veillard260a68f1998-08-13 03:39:55 +00005094 /*
5095 * Last case, text. Note that References are handled directly.
5096 */
5097 else {
5098 xmlParseCharData(ctxt, 0);
5099 }
5100
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005101 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005102 /*
5103 * Pop-up of finished entities.
5104 */
Daniel Veillardbc50b591999-03-01 12:28:53 +00005105 while ((CUR == 0) && (ctxt->inputNr > 1))
5106 xmlPopInput(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005107
Daniel Veillardb96e6431999-08-29 21:02:19 +00005108 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
5109 (tok == ctxt->token)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005111 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005112 "detected an error in element content\n");
5113 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005114 break;
5115 }
5116 }
5117}
5118
Daniel Veillard11e00581998-10-24 18:27:49 +00005119/**
5120 * xmlParseElement:
5121 * @ctxt: an XML parser context
5122 *
5123 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00005124 *
5125 * [39] element ::= EmptyElemTag | STag content ETag
5126 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005127 * [ WFC: Element Type Match ]
5128 * The Name in an element's end-tag must match the element type in the
5129 * start-tag.
5130 *
5131 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005132 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00005133 * where the Name matches the element type and one of the following holds:
5134 * - The declaration matches EMPTY and the element has no content.
5135 * - The declaration matches children and the sequence of child elements
5136 * belongs to the language generated by the regular expression in the
5137 * content model, with optional white space (characters matching the
5138 * nonterminal S) between each pair of child elements.
5139 * - The declaration matches Mixed and the content consists of character
5140 * data and child elements whose types match names in the content model.
5141 * - The declaration matches ANY, and the types of any child elements have
5142 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005143 */
5144
Daniel Veillard517752b1999-04-05 12:20:10 +00005145void
Daniel Veillard1e346af1999-02-22 10:33:01 +00005146xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005147 const CHAR *openTag = CUR_PTR;
Daniel Veillard14fff061999-06-22 21:49:07 +00005148 CHAR *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005149 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00005150 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005151
5152 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00005153 if (ctxt->record_info) {
5154 node_info.begin_pos = ctxt->input->consumed +
5155 (CUR_PTR - ctxt->input->base);
5156 node_info.begin_line = ctxt->input->line;
5157 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005158
Daniel Veillard14fff061999-06-22 21:49:07 +00005159 name = xmlParseStartTag(ctxt);
5160 if (name == NULL) {
5161 return;
5162 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00005163 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005164
5165 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005166 * [ VC: Root Element Type ]
5167 * The Name in the document type declaration must match the element
5168 * type of the root element.
5169 */
5170 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5171 ctxt->node && (ctxt->node == ctxt->myDoc->root))
5172 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
5173
5174 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005175 * Check for an Empty Element.
5176 */
5177 if ((CUR == '/') && (NXT(1) == '>')) {
5178 SKIP(2);
Daniel Veillard517752b1999-04-05 12:20:10 +00005179 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
Daniel Veillard14fff061999-06-22 21:49:07 +00005180 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005181 xmlFree(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00005182 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005183 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005184 if (CUR == '>') {
5185 NEXT;
5186 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005188 ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00005189 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005190 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005191
5192 /*
5193 * end of parsing of this node.
5194 */
5195 nodePop(ctxt);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005196 xmlFree(name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00005197
5198 /*
5199 * Capture end position and add node
5200 */
5201 if ( ret != NULL && ctxt->record_info ) {
5202 node_info.end_pos = ctxt->input->consumed +
5203 (CUR_PTR - ctxt->input->base);
5204 node_info.end_line = ctxt->input->line;
5205 node_info.node = ret;
5206 xmlParserAddNodeInfo(ctxt, &node_info);
5207 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005208 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005209 }
5210
5211 /*
5212 * Parse the content of the element:
5213 */
5214 xmlParseContent(ctxt);
5215 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005217 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00005218 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005219 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005220
5221 /*
5222 * end of parsing of this node.
5223 */
5224 nodePop(ctxt);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005225 xmlFree(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00005226 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005227 }
5228
5229 /*
5230 * parse the end of tag: '</' should be here.
5231 */
Daniel Veillard14fff061999-06-22 21:49:07 +00005232 xmlParseEndTag(ctxt, name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005233 xmlFree(name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00005234
5235 /*
5236 * Capture end position and add node
5237 */
5238 if ( ret != NULL && ctxt->record_info ) {
5239 node_info.end_pos = ctxt->input->consumed +
5240 (CUR_PTR - ctxt->input->base);
5241 node_info.end_line = ctxt->input->line;
5242 node_info.node = ret;
5243 xmlParserAddNodeInfo(ctxt, &node_info);
5244 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005245}
5246
Daniel Veillard11e00581998-10-24 18:27:49 +00005247/**
5248 * xmlParseVersionNum:
5249 * @ctxt: an XML parser context
5250 *
5251 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005252 *
5253 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00005254 *
5255 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005256 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005257CHAR *
5258xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005259 const CHAR *q = CUR_PTR;
5260 CHAR *ret;
5261
5262 while (IS_CHAR(CUR) &&
5263 (((CUR >= 'a') && (CUR <= 'z')) ||
5264 ((CUR >= 'A') && (CUR <= 'Z')) ||
5265 ((CUR >= '0') && (CUR <= '9')) ||
5266 (CUR == '_') || (CUR == '.') ||
5267 (CUR == ':') || (CUR == '-'))) NEXT;
5268 ret = xmlStrndup(q, CUR_PTR - q);
5269 return(ret);
5270}
5271
Daniel Veillard11e00581998-10-24 18:27:49 +00005272/**
5273 * xmlParseVersionInfo:
5274 * @ctxt: an XML parser context
5275 *
5276 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005277 *
5278 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5279 *
5280 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00005281 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005282 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00005283 */
5284
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005285CHAR *
5286xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005287 CHAR *version = NULL;
5288 const CHAR *q;
5289
5290 if ((CUR == 'v') && (NXT(1) == 'e') &&
5291 (NXT(2) == 'r') && (NXT(3) == 's') &&
5292 (NXT(4) == 'i') && (NXT(5) == 'o') &&
5293 (NXT(6) == 'n')) {
5294 SKIP(7);
5295 SKIP_BLANKS;
5296 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005298 ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005299 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005300 return(NULL);
5301 }
5302 NEXT;
5303 SKIP_BLANKS;
5304 if (CUR == '"') {
5305 NEXT;
5306 q = CUR_PTR;
5307 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005308 if (CUR != '"') {
5309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005310 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005311 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005312 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005313 NEXT;
5314 } else if (CUR == '\''){
5315 NEXT;
5316 q = CUR_PTR;
5317 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005318 if (CUR != '\'') {
5319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005320 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005321 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005322 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005323 NEXT;
5324 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005326 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005327 "xmlParseVersionInfo : expected ' or \"\n");
5328 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005329 }
5330 }
5331 return(version);
5332}
5333
Daniel Veillard11e00581998-10-24 18:27:49 +00005334/**
5335 * xmlParseEncName:
5336 * @ctxt: an XML parser context
5337 *
5338 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00005339 *
5340 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00005341 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005342 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005343 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005344CHAR *
5345xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005346 const CHAR *q = CUR_PTR;
5347 CHAR *ret = NULL;
5348
5349 if (((CUR >= 'a') && (CUR <= 'z')) ||
5350 ((CUR >= 'A') && (CUR <= 'Z'))) {
5351 NEXT;
5352 while (IS_CHAR(CUR) &&
5353 (((CUR >= 'a') && (CUR <= 'z')) ||
5354 ((CUR >= 'A') && (CUR <= 'Z')) ||
5355 ((CUR >= '0') && (CUR <= '9')) ||
5356 (CUR == '-'))) NEXT;
5357 ret = xmlStrndup(q, CUR_PTR - q);
5358 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005360 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005361 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005362 }
5363 return(ret);
5364}
5365
Daniel Veillard11e00581998-10-24 18:27:49 +00005366/**
5367 * xmlParseEncodingDecl:
5368 * @ctxt: an XML parser context
5369 *
5370 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005371 *
5372 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00005373 *
5374 * TODO: this should setup the conversion filters.
5375 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005376 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005377 */
5378
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005379CHAR *
5380xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005381 CHAR *encoding = NULL;
5382 const CHAR *q;
5383
5384 SKIP_BLANKS;
5385 if ((CUR == 'e') && (NXT(1) == 'n') &&
5386 (NXT(2) == 'c') && (NXT(3) == 'o') &&
5387 (NXT(4) == 'd') && (NXT(5) == 'i') &&
5388 (NXT(6) == 'n') && (NXT(7) == 'g')) {
5389 SKIP(8);
5390 SKIP_BLANKS;
5391 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005393 ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005394 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005395 return(NULL);
5396 }
5397 NEXT;
5398 SKIP_BLANKS;
5399 if (CUR == '"') {
5400 NEXT;
5401 q = CUR_PTR;
5402 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005403 if (CUR != '"') {
5404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005405 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005406 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005407 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005408 NEXT;
5409 } else if (CUR == '\''){
5410 NEXT;
5411 q = CUR_PTR;
5412 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005413 if (CUR != '\'') {
5414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005415 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005416 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005417 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005418 NEXT;
5419 } else if (CUR == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005421 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005422 "xmlParseEncodingDecl : expected ' or \"\n");
5423 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005424 }
5425 }
5426 return(encoding);
5427}
5428
Daniel Veillard11e00581998-10-24 18:27:49 +00005429/**
5430 * xmlParseSDDecl:
5431 * @ctxt: an XML parser context
5432 *
5433 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005434 *
5435 * [32] SDDecl ::= S 'standalone' Eq
5436 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00005437 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005438 * [ VC: Standalone Document Declaration ]
5439 * TODO The standalone document declaration must have the value "no"
5440 * if any external markup declarations contain declarations of:
5441 * - attributes with default values, if elements to which these
5442 * attributes apply appear in the document without specifications
5443 * of values for these attributes, or
5444 * - entities (other than amp, lt, gt, apos, quot), if references
5445 * to those entities appear in the document, or
5446 * - attributes with values subject to normalization, where the
5447 * attribute appears in the document with a value which will change
5448 * as a result of normalization, or
5449 * - element types with element content, if white space occurs directly
5450 * within any instance of those types.
5451 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005452 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00005453 */
5454
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005455int
5456xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005457 int standalone = -1;
5458
5459 SKIP_BLANKS;
5460 if ((CUR == 's') && (NXT(1) == 't') &&
5461 (NXT(2) == 'a') && (NXT(3) == 'n') &&
5462 (NXT(4) == 'd') && (NXT(5) == 'a') &&
5463 (NXT(6) == 'l') && (NXT(7) == 'o') &&
5464 (NXT(8) == 'n') && (NXT(9) == 'e')) {
5465 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005466 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005467 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005469 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005470 "XML standalone declaration : expected '='\n");
5471 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005472 return(standalone);
5473 }
5474 NEXT;
5475 SKIP_BLANKS;
5476 if (CUR == '\''){
5477 NEXT;
5478 if ((CUR == 'n') && (NXT(1) == 'o')) {
5479 standalone = 0;
5480 SKIP(2);
5481 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5482 (NXT(2) == 's')) {
5483 standalone = 1;
5484 SKIP(3);
5485 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005487 ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005488 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005489 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005490 if (CUR != '\'') {
5491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005492 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005493 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005494 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005495 NEXT;
5496 } else if (CUR == '"'){
5497 NEXT;
5498 if ((CUR == 'n') && (NXT(1) == 'o')) {
5499 standalone = 0;
5500 SKIP(2);
5501 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5502 (NXT(2) == 's')) {
5503 standalone = 1;
5504 SKIP(3);
5505 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005507 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005508 "standalone accepts only 'yes' or 'no'\n");
5509 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005510 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005511 if (CUR != '"') {
5512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005513 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005514 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005515 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005516 NEXT;
5517 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005519 ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005520 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005521 }
5522 }
5523 return(standalone);
5524}
5525
Daniel Veillard11e00581998-10-24 18:27:49 +00005526/**
5527 * xmlParseXMLDecl:
5528 * @ctxt: an XML parser context
5529 *
5530 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00005531 *
5532 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
5533 */
5534
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005535void
5536xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005537 CHAR *version;
5538
5539 /*
5540 * We know that '<?xml' is here.
5541 */
5542 SKIP(5);
5543
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005544 if (!IS_BLANK(CUR)) {
5545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005546 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005547 ctxt->wellFormed = 0;
5548 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005549 SKIP_BLANKS;
5550
5551 /*
5552 * We should have the VersionInfo here.
5553 */
5554 version = xmlParseVersionInfo(ctxt);
5555 if (version == NULL)
5556 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00005557 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005558 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005559
5560 /*
5561 * We may have the encoding declaration
5562 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005563 if (!IS_BLANK(CUR)) {
5564 if ((CUR == '?') && (NXT(1) == '>')) {
5565 SKIP(2);
5566 return;
5567 }
5568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005569 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005570 ctxt->wellFormed = 0;
5571 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005572 ctxt->encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005573
5574 /*
5575 * We may have the standalone status.
5576 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005577 if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005578 if ((CUR == '?') && (NXT(1) == '>')) {
5579 SKIP(2);
5580 return;
5581 }
5582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005583 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005584 ctxt->wellFormed = 0;
5585 }
5586 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00005587 ctxt->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005588
5589 SKIP_BLANKS;
5590 if ((CUR == '?') && (NXT(1) == '>')) {
5591 SKIP(2);
5592 } else if (CUR == '>') {
5593 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005595 ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005596 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005597 NEXT;
5598 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005600 ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005601 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005602 MOVETO_ENDTAG(CUR_PTR);
5603 NEXT;
5604 }
5605}
5606
Daniel Veillard11e00581998-10-24 18:27:49 +00005607/**
5608 * xmlParseMisc:
5609 * @ctxt: an XML parser context
5610 *
5611 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005612 *
5613 * [27] Misc ::= Comment | PI | S
5614 */
5615
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005616void
5617xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005618 while (((CUR == '<') && (NXT(1) == '?')) ||
5619 ((CUR == '<') && (NXT(1) == '!') &&
5620 (NXT(2) == '-') && (NXT(3) == '-')) ||
5621 IS_BLANK(CUR)) {
5622 if ((CUR == '<') && (NXT(1) == '?')) {
5623 xmlParsePI(ctxt);
5624 } else if (IS_BLANK(CUR)) {
5625 NEXT;
5626 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00005627 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005628 }
5629}
5630
Daniel Veillard11e00581998-10-24 18:27:49 +00005631/**
5632 * xmlParseDocument :
5633 * @ctxt: an XML parser context
5634 *
5635 * parse an XML document (and build a tree if using the standard SAX
5636 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00005637 *
5638 * [1] document ::= prolog element Misc*
5639 *
5640 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00005641 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005642 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00005643 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005644 */
5645
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005646int
5647xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005648 xmlDefaultSAXHandlerInit();
5649
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005650 GROW;
5651
Daniel Veillard260a68f1998-08-13 03:39:55 +00005652 /*
5653 * SAX: beginning of the document processing.
5654 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005655 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00005656 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005657
5658 /*
Daniel Veillardb96e6431999-08-29 21:02:19 +00005659 * TODO We should check for encoding here and plug-in some
5660 * conversion code !!!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00005661 */
5662
5663 /*
5664 * Wipe out everything which is before the first '<'
5665 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005666 if (IS_BLANK(CUR)) {
5667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005668 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005669 "Extra spaces at the beginning of the document are not allowed\n");
5670 ctxt->wellFormed = 0;
5671 SKIP_BLANKS;
5672 }
5673
5674 if (CUR == 0) {
5675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005676 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005677 ctxt->wellFormed = 0;
5678 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005679
5680 /*
5681 * Check for the XMLDecl in the Prolog.
5682 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005683 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005684 if ((CUR == '<') && (NXT(1) == '?') &&
5685 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5686 (NXT(4) == 'l')) {
5687 xmlParseXMLDecl(ctxt);
5688 /* SKIP_EOL(cur); */
5689 SKIP_BLANKS;
5690 } else if ((CUR == '<') && (NXT(1) == '?') &&
5691 (NXT(2) == 'X') && (NXT(3) == 'M') &&
5692 (NXT(4) == 'L')) {
5693 /*
5694 * The first drafts were using <?XML and the final W3C REC
5695 * now use <?xml ...
5696 */
5697 xmlParseXMLDecl(ctxt);
5698 /* SKIP_EOL(cur); */
5699 SKIP_BLANKS;
5700 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00005701 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005702 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005703 if ((ctxt->sax) && (ctxt->sax->startDocument))
Daniel Veillard27d88741999-05-29 11:51:49 +00005704 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005705
5706 /*
5707 * The Misc part of the Prolog
5708 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005709 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005710 xmlParseMisc(ctxt);
5711
5712 /*
5713 * Then possibly doc type declaration(s) and more Misc
5714 * (doctypedecl Misc*)?
5715 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005716 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005717 if ((CUR == '<') && (NXT(1) == '!') &&
5718 (NXT(2) == 'D') && (NXT(3) == 'O') &&
5719 (NXT(4) == 'C') && (NXT(5) == 'T') &&
5720 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5721 (NXT(8) == 'E')) {
5722 xmlParseDocTypeDecl(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005723 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005724 xmlParseMisc(ctxt);
5725 }
5726
5727 /*
5728 * Time to start parsing the tree itself
5729 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005730 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005731 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard517752b1999-04-05 12:20:10 +00005732 xmlParseElement(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005733 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005734
5735 /*
5736 * The Misc part at the end
5737 */
5738 xmlParseMisc(ctxt);
5739
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005740 if (CUR != 0) {
5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005742 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005743 "Extra content at the end of the document\n");
5744 ctxt->wellFormed = 0;
5745 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005746 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005747
Daniel Veillard260a68f1998-08-13 03:39:55 +00005748 /*
5749 * SAX: end of the document processing.
5750 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005751 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005752 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005753 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005754 return(0);
5755}
5756
Daniel Veillardb05deb71999-08-10 19:04:08 +00005757/************************************************************************
5758 * *
5759 * I/O front end functions to the parser *
5760 * *
5761 ************************************************************************/
5762
Daniel Veillard11e00581998-10-24 18:27:49 +00005763/**
Daniel Veillardbe70ff71999-07-05 16:50:46 +00005764 * xmlCreateDocParserCtxt :
Daniel Veillardd692aa41999-02-28 21:54:31 +00005765 * @cur: a pointer to an array of CHAR
5766 *
5767 * Create a parser context for an XML in-memory document.
5768 *
5769 * Returns the new parser context or NULL
5770 */
5771xmlParserCtxtPtr
5772xmlCreateDocParserCtxt(CHAR *cur) {
5773 xmlParserCtxtPtr ctxt;
5774 xmlParserInputPtr input;
Daniel Veillard27d88741999-05-29 11:51:49 +00005775 xmlCharEncoding enc;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005776
Daniel Veillardb05deb71999-08-10 19:04:08 +00005777 ctxt = xmlNewParserCtxt();
Daniel Veillardd692aa41999-02-28 21:54:31 +00005778 if (ctxt == NULL) {
Daniel Veillardd692aa41999-02-28 21:54:31 +00005779 return(NULL);
5780 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005781 input = xmlNewInputStream(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00005782 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005783 xmlFreeParserCtxt(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00005784 return(NULL);
5785 }
5786
Daniel Veillard27d88741999-05-29 11:51:49 +00005787 /*
5788 * plug some encoding conversion routines here. !!!
5789 */
5790 enc = xmlDetectCharEncoding(cur);
5791 xmlSwitchEncoding(ctxt, enc);
5792
Daniel Veillardd692aa41999-02-28 21:54:31 +00005793 input->base = cur;
5794 input->cur = cur;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005795
5796 inputPush(ctxt, input);
5797 return(ctxt);
5798}
5799
5800/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005801 * xmlSAXParseDoc :
5802 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00005803 * @cur: a pointer to an array of CHAR
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005804 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5805 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00005806 *
5807 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005808 * It use the given SAX function block to handle the parsing callback.
5809 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00005810 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005811 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00005812 */
5813
Daniel Veillard1e346af1999-02-22 10:33:01 +00005814xmlDocPtr
5815xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005816 xmlDocPtr ret;
5817 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005818
5819 if (cur == NULL) return(NULL);
5820
Daniel Veillardd692aa41999-02-28 21:54:31 +00005821
5822 ctxt = xmlCreateDocParserCtxt(cur);
5823 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00005824 if (sax != NULL) {
5825 ctxt->sax = sax;
5826 ctxt->userData = NULL;
5827 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005828
5829 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00005830 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005831 else {
5832 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00005833 xmlFreeDoc(ctxt->myDoc);
5834 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005835 }
Daniel Veillard97fea181999-06-26 23:07:37 +00005836 if (sax != NULL)
5837 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005838 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005839
5840 return(ret);
5841}
5842
Daniel Veillard11e00581998-10-24 18:27:49 +00005843/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005844 * xmlParseDoc :
5845 * @cur: a pointer to an array of CHAR
5846 *
5847 * parse an XML in-memory document and build a tree.
5848 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005849 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005850 */
5851
Daniel Veillard1e346af1999-02-22 10:33:01 +00005852xmlDocPtr
5853xmlParseDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005854 return(xmlSAXParseDoc(NULL, cur, 0));
5855}
5856
5857/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00005858 * xmlSAXParseDTD :
5859 * @sax: the SAX handler block
5860 * @ExternalID: a NAME* containing the External ID of the DTD
5861 * @SystemID: a NAME* containing the URL to the DTD
5862 *
5863 * Load and parse an external subset.
5864 *
5865 * Returns the resulting xmlDtdPtr or NULL in case of error.
5866 */
5867
5868xmlDtdPtr
5869xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5870 const CHAR *SystemID) {
5871 xmlDtdPtr ret = NULL;
5872 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +00005873 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005874 xmlCharEncoding enc;
5875
5876 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5877
Daniel Veillardb05deb71999-08-10 19:04:08 +00005878 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +00005879 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005880 return(NULL);
5881 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005882
5883 /*
5884 * Set-up the SAX context
5885 */
5886 if (ctxt == NULL) return(NULL);
5887 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005888 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005889 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005890 ctxt->sax = sax;
5891 ctxt->userData = NULL;
5892 }
5893
5894 /*
5895 * Ask the Entity resolver to load the damn thing
5896 */
5897
5898 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5899 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5900 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +00005901 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005902 xmlFreeParserCtxt(ctxt);
5903 return(NULL);
5904 }
5905
5906 /*
5907 * plug some encoding conversion routines here. !!!
5908 */
5909 xmlPushInput(ctxt, input);
5910 enc = xmlDetectCharEncoding(ctxt->input->cur);
5911 xmlSwitchEncoding(ctxt, enc);
5912
Daniel Veillardb05deb71999-08-10 19:04:08 +00005913 if (input->filename == NULL)
Daniel Veillardb96e6431999-08-29 21:02:19 +00005914 input->filename = (char *) xmlStrdup(SystemID); /* !!!!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00005915 input->line = 1;
5916 input->col = 1;
5917 input->base = ctxt->input->cur;
5918 input->cur = ctxt->input->cur;
5919 input->free = NULL;
5920
5921 /*
5922 * let's parse that entity knowing it's an external subset.
5923 */
5924 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
5925
5926 if (ctxt->myDoc != NULL) {
5927 if (ctxt->wellFormed) {
5928 ret = ctxt->myDoc->intSubset;
5929 ctxt->myDoc->intSubset = NULL;
5930 } else {
5931 ret = NULL;
5932 }
5933 xmlFreeDoc(ctxt->myDoc);
5934 ctxt->myDoc = NULL;
5935 }
Daniel Veillard97fea181999-06-26 23:07:37 +00005936 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005937 xmlFreeParserCtxt(ctxt);
5938
5939 return(ret);
5940}
5941
5942/**
5943 * xmlParseDTD :
5944 * @ExternalID: a NAME* containing the External ID of the DTD
5945 * @SystemID: a NAME* containing the URL to the DTD
5946 *
5947 * Load and parse an external subset.
5948 *
5949 * Returns the resulting xmlDtdPtr or NULL in case of error.
5950 */
5951
5952xmlDtdPtr
5953xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5954 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
5955}
5956
5957/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005958 * xmlRecoverDoc :
5959 * @cur: a pointer to an array of CHAR
5960 *
5961 * parse an XML in-memory document and build a tree.
5962 * In the case the document is not Well Formed, a tree is built anyway
5963 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005964 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005965 */
5966
Daniel Veillard1e346af1999-02-22 10:33:01 +00005967xmlDocPtr
5968xmlRecoverDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005969 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005970}
5971
5972/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00005973 * xmlCreateFileParserCtxt :
Daniel Veillard11e00581998-10-24 18:27:49 +00005974 * @filename: the filename
5975 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00005976 * Create a parser context for a file content.
5977 * Automatic support for ZLIB/Compress compressed document is provided
5978 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +00005979 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00005980 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005981 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00005982xmlParserCtxtPtr
5983xmlCreateFileParserCtxt(const char *filename)
5984{
5985 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005986 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005987 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005988 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005989
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005990 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5991 if (buf == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005992
Daniel Veillardb05deb71999-08-10 19:04:08 +00005993 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00005994 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005995 return(NULL);
5996 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005997
5998 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005999 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006000 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006001 return(NULL);
6002 }
6003
Daniel Veillard6454aec1999-09-02 22:04:43 +00006004 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006005 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006006 inputStream->base = inputStream->buf->buffer->content;
6007 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006008
6009 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006010 if ((ctxt->directory == NULL) && (directory == NULL))
6011 directory = xmlParserGetDirectory(filename);
6012 if ((ctxt->directory == NULL) && (directory != NULL))
6013 ctxt->directory = directory;
6014
Daniel Veillardd692aa41999-02-28 21:54:31 +00006015 return(ctxt);
6016}
6017
6018/**
6019 * xmlSAXParseFile :
6020 * @sax: the SAX handler block
6021 * @filename: the filename
6022 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6023 * documents
6024 *
6025 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6026 * compressed document is provided by default if found at compile-time.
6027 * It use the given SAX function block to handle the parsing callback.
6028 * If sax is NULL, fallback to the default DOM tree building routines.
6029 *
6030 * Returns the resulting document tree
6031 */
6032
Daniel Veillard011b63c1999-06-02 17:44:04 +00006033xmlDocPtr
6034xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +00006035 int recovery) {
6036 xmlDocPtr ret;
6037 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006038 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00006039
6040 ctxt = xmlCreateFileParserCtxt(filename);
6041 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00006042 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006043 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006044 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +00006045 ctxt->sax = sax;
6046 ctxt->userData = NULL;
6047 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006048
Daniel Veillardb05deb71999-08-10 19:04:08 +00006049 if ((ctxt->directory == NULL) && (directory == NULL))
6050 directory = xmlParserGetDirectory(filename);
6051 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillardb96e6431999-08-29 21:02:19 +00006052 ctxt->directory = (char *) xmlStrdup((CHAR *) directory); /* !!!!!!! */
Daniel Veillardb05deb71999-08-10 19:04:08 +00006053
Daniel Veillard260a68f1998-08-13 03:39:55 +00006054 xmlParseDocument(ctxt);
6055
Daniel Veillard517752b1999-04-05 12:20:10 +00006056 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006057 else {
6058 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00006059 xmlFreeDoc(ctxt->myDoc);
6060 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006061 }
Daniel Veillard97fea181999-06-26 23:07:37 +00006062 if (sax != NULL)
6063 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00006064 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006065
6066 return(ret);
6067}
6068
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006069/**
6070 * xmlParseFile :
6071 * @filename: the filename
6072 *
6073 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6074 * compressed document is provided by default if found at compile-time.
6075 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006076 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006077 */
6078
Daniel Veillard011b63c1999-06-02 17:44:04 +00006079xmlDocPtr
6080xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006081 return(xmlSAXParseFile(NULL, filename, 0));
6082}
6083
6084/**
6085 * xmlRecoverFile :
6086 * @filename: the filename
6087 *
6088 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6089 * compressed document is provided by default if found at compile-time.
6090 * In the case the document is not Well Formed, a tree is built anyway
6091 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006092 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006093 */
6094
Daniel Veillard011b63c1999-06-02 17:44:04 +00006095xmlDocPtr
6096xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006097 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006098}
Daniel Veillard260a68f1998-08-13 03:39:55 +00006099
Daniel Veillard11e00581998-10-24 18:27:49 +00006100/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00006101 * xmlCreateMemoryParserCtxt :
Daniel Veillard1e346af1999-02-22 10:33:01 +00006102 * @buffer: an pointer to a char array
Daniel Veillard11e00581998-10-24 18:27:49 +00006103 * @size: the siwe of the array
6104 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00006105 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +00006106 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00006107 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006108 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00006109xmlParserCtxtPtr
6110xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006111 xmlParserCtxtPtr ctxt;
6112 xmlParserInputPtr input;
Daniel Veillard27d88741999-05-29 11:51:49 +00006113 xmlCharEncoding enc;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006114
6115 buffer[size - 1] = '\0';
6116
Daniel Veillardb05deb71999-08-10 19:04:08 +00006117 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00006118 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006119 return(NULL);
6120 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006121
6122 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006123 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006124 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006125 return(NULL);
6126 }
6127
6128 input->filename = NULL;
6129 input->line = 1;
6130 input->col = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006131 input->buf = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006132 input->consumed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006133
6134 /*
Daniel Veillard27d88741999-05-29 11:51:49 +00006135 * plug some encoding conversion routines here. !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00006136 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00006137 enc = xmlDetectCharEncoding(BAD_CAST buffer);
Daniel Veillard27d88741999-05-29 11:51:49 +00006138 xmlSwitchEncoding(ctxt, enc);
6139
Daniel Veillardb96e6431999-08-29 21:02:19 +00006140 input->base = BAD_CAST buffer;
6141 input->cur = BAD_CAST buffer;
Daniel Veillardd692aa41999-02-28 21:54:31 +00006142 input->free = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006143
6144 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00006145 return(ctxt);
6146}
6147
6148/**
6149 * xmlSAXParseMemory :
6150 * @sax: the SAX handler block
6151 * @buffer: an pointer to a char array
6152 * @size: the siwe of the array
6153 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6154 * documents
6155 *
6156 * parse an XML in-memory block and use the given SAX function block
6157 * to handle the parsing callback. If sax is NULL, fallback to the default
6158 * DOM tree building routines.
6159 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00006160 * Returns the resulting document tree
6161 */
6162xmlDocPtr
6163xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6164 xmlDocPtr ret;
6165 xmlParserCtxtPtr ctxt;
6166
6167 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6168 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00006169 if (sax != NULL) {
6170 ctxt->sax = sax;
6171 ctxt->userData = NULL;
6172 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006173
6174 xmlParseDocument(ctxt);
6175
Daniel Veillard517752b1999-04-05 12:20:10 +00006176 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006177 else {
6178 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00006179 xmlFreeDoc(ctxt->myDoc);
6180 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006181 }
Daniel Veillard97fea181999-06-26 23:07:37 +00006182 if (sax != NULL)
6183 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00006184 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006185
6186 return(ret);
6187}
6188
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006189/**
6190 * xmlParseMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00006191 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006192 * @size: the size of the array
6193 *
6194 * parse an XML in-memory block and build a tree.
6195 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006196 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006197 */
6198
6199xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006200 return(xmlSAXParseMemory(NULL, buffer, size, 0));
6201}
6202
6203/**
6204 * xmlRecoverMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00006205 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006206 * @size: the size of the array
6207 *
6208 * parse an XML in-memory block and build a tree.
6209 * In the case the document is not Well Formed, a tree is built anyway
6210 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006211 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006212 */
6213
6214xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6215 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006216}
Daniel Veillard260a68f1998-08-13 03:39:55 +00006217
Daniel Veillard260a68f1998-08-13 03:39:55 +00006218
Daniel Veillard11e00581998-10-24 18:27:49 +00006219/**
6220 * xmlSetupParserForBuffer:
6221 * @ctxt: an XML parser context
6222 * @buffer: a CHAR * buffer
6223 * @filename: a file name
6224 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00006225 * Setup the parser context to parse a new buffer; Clears any prior
6226 * contents from the parser context. The buffer parameter must not be
6227 * NULL, but the filename parameter can be
6228 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006229void
6230xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +00006231 const char* filename)
6232{
Daniel Veillardb05deb71999-08-10 19:04:08 +00006233 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006234
Daniel Veillardb05deb71999-08-10 19:04:08 +00006235 input = xmlNewInputStream(ctxt);
6236 if (input == NULL) {
6237 perror("malloc");
Daniel Veillard6454aec1999-09-02 22:04:43 +00006238 xmlFree(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006239 exit(1);
6240 }
6241
6242 xmlClearParserCtxt(ctxt);
6243 if (filename != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006244 input->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006245 input->base = buffer;
6246 input->cur = buffer;
6247 inputPush(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006248}
6249
6250
Daniel Veillardb05deb71999-08-10 19:04:08 +00006251/************************************************************************
6252 * *
6253 * Miscelaneous *
6254 * *
6255 ************************************************************************/
6256
6257
Daniel Veillard11e00581998-10-24 18:27:49 +00006258/**
6259 * xmlParserFindNodeInfo:
6260 * @ctxt: an XML parser context
6261 * @node: an XML node within the tree
6262 *
6263 * Find the parser node info struct for a given node
6264 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006265 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006266 */
6267const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6268 const xmlNode* node)
6269{
6270 unsigned long pos;
6271
6272 /* Find position where node should be at */
6273 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6274 if ( ctx->node_seq.buffer[pos].node == node )
6275 return &ctx->node_seq.buffer[pos];
6276 else
6277 return NULL;
6278}
6279
6280
Daniel Veillard11e00581998-10-24 18:27:49 +00006281/**
6282 * xmlInitNodeInfoSeq :
6283 * @seq: a node info sequence pointer
6284 *
6285 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00006286 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006287void
6288xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006289{
6290 seq->length = 0;
6291 seq->maximum = 0;
6292 seq->buffer = NULL;
6293}
6294
Daniel Veillard11e00581998-10-24 18:27:49 +00006295/**
6296 * xmlClearNodeInfoSeq :
6297 * @seq: a node info sequence pointer
6298 *
6299 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +00006300 * info sequence
6301 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006302void
6303xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006304{
6305 if ( seq->buffer != NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +00006306 xmlFree(seq->buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006307 xmlInitNodeInfoSeq(seq);
6308}
6309
6310
Daniel Veillard11e00581998-10-24 18:27:49 +00006311/**
6312 * xmlParserFindNodeInfoIndex:
6313 * @seq: a node info sequence pointer
6314 * @node: an XML node pointer
6315 *
6316 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00006317 * xmlParserFindNodeInfoIndex : Find the index that the info record for
6318 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +00006319 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006320 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +00006321 */
6322unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6323 const xmlNode* node)
6324{
6325 unsigned long upper, lower, middle;
6326 int found = 0;
6327
6328 /* Do a binary search for the key */
6329 lower = 1;
6330 upper = seq->length;
6331 middle = 0;
6332 while ( lower <= upper && !found) {
6333 middle = lower + (upper - lower) / 2;
6334 if ( node == seq->buffer[middle - 1].node )
6335 found = 1;
6336 else if ( node < seq->buffer[middle - 1].node )
6337 upper = middle - 1;
6338 else
6339 lower = middle + 1;
6340 }
6341
6342 /* Return position */
6343 if ( middle == 0 || seq->buffer[middle - 1].node < node )
6344 return middle;
6345 else
6346 return middle - 1;
6347}
6348
6349
Daniel Veillard11e00581998-10-24 18:27:49 +00006350/**
6351 * xmlParserAddNodeInfo:
6352 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00006353 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +00006354 *
6355 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00006356 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006357void
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006358xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +00006359 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006360{
6361 unsigned long pos;
6362 static unsigned int block_size = 5;
6363
6364 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006365 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6366 if ( pos < ctxt->node_seq.length
6367 && ctxt->node_seq.buffer[pos].node == info->node ) {
6368 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006369 }
6370
6371 /* Otherwise, we need to add new node to buffer */
6372 else {
6373 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006374 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006375 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006376 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6377 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +00006378
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006379 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +00006380 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006381 else
Daniel Veillard6454aec1999-09-02 22:04:43 +00006382 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006383
6384 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006386 ctxt->sax->error(ctxt->userData, "Out of memory\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00006387 return;
6388 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006389 ctxt->node_seq.buffer = tmp_buffer;
6390 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006391 }
6392
6393 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006394 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006395 unsigned long i;
6396
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006397 for ( i = ctxt->node_seq.length; i > pos; i-- )
6398 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +00006399 }
6400
6401 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006402 ctxt->node_seq.buffer[pos] = *info;
6403 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006404 }
6405}
Daniel Veillard011b63c1999-06-02 17:44:04 +00006406
6407
Daniel Veillardb05deb71999-08-10 19:04:08 +00006408/**
6409 * xmlSubstituteEntitiesDefault :
6410 * @val: int 0 or 1
6411 *
6412 * Set and return the previous value for default entity support.
6413 * Initially the parser always keep entity references instead of substituting
6414 * entity values in the output. This function has to be used to change the
6415 * default parser behaviour
6416 * SAX::subtituteEntities() has to be used for changing that on a file by
6417 * file basis.
6418 *
6419 * Returns the last value for 0 for no substitution, 1 for substitution.
6420 */
6421
6422int
6423xmlSubstituteEntitiesDefault(int val) {
6424 int old = xmlSubstituteEntitiesDefaultValue;
6425
6426 xmlSubstituteEntitiesDefaultValue = val;
6427 return(old);
6428}
6429