blob: 8fd479778b5bb7e7face7394d7f9e27abf199bd0 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
13#include <config.h>
14#endif
15#include <stdio.h>
16#include <ctype.h>
17#include <string.h> /* for memset() only */
Seth Alvese7f12e61998-10-01 20:51:15 +000018#include <stdlib.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000019#include <sys/stat.h>
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_ZLIB_H
27#include <zlib.h>
28#endif
29
30#include "tree.h"
31#include "parser.h"
32#include "entities.h"
Daniel Veillard27d88741999-05-29 11:51:49 +000033#include "encoding.h"
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000034#include "valid.h"
Daniel Veillard1e346af1999-02-22 10:33:01 +000035#include "parserInternals.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000036#include "xmlIO.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000037
Daniel Veillard14fff061999-06-22 21:49:07 +000038const char *xmlParserVersion = LIBXML_VERSION;
39
Daniel Veillarde2d034d1999-07-27 19:52:06 +000040#define XML_MAX_NAMELEN 1000
41
42/************************************************************************
43 * *
44 * Input handling functions for progressive parsing *
45 * *
46 ************************************************************************/
47
48/* #define DEBUG_INPUT */
49
Daniel Veillardb05deb71999-08-10 19:04:08 +000050#define INPUT_CHUNK 250
51/* we need to keep enough input to show errors in context */
52#define LINE_LEN 80
Daniel Veillarde2d034d1999-07-27 19:52:06 +000053
54#ifdef DEBUG_INPUT
55#define CHECK_BUFFER(in) check_buffer(in)
Daniel Veillarde2d034d1999-07-27 19:52:06 +000056
57void check_buffer(xmlParserInputPtr in) {
58 if (in->base != in->buf->buffer->content) {
59 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
60 }
61 if (in->cur < in->base) {
62 fprintf(stderr, "xmlParserInput: cur < base problem\n");
63 }
64 if (in->cur > in->base + in->buf->buffer->use) {
65 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
66 }
67 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
68 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
69 in->buf->buffer->use, in->buf->buffer->size);
70}
71
Daniel Veillardb05deb71999-08-10 19:04:08 +000072#else
73#define CHECK_BUFFER(in)
74#endif
75
Daniel Veillarde2d034d1999-07-27 19:52:06 +000076
77/**
78 * xmlParserInputRead:
79 * @in: an XML parser input
80 * @len: an indicative size for the lookahead
81 *
82 * This function refresh the input for the parser. It doesn't try to
83 * preserve pointers to the input buffer, and discard already read data
84 *
85 * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
86 * end of this entity
87 */
88int
89xmlParserInputRead(xmlParserInputPtr in, int len) {
90 int ret;
91 int used;
92 int index;
93
94#ifdef DEBUG_INPUT
95 fprintf(stderr, "Read\n");
96#endif
97 if (in->buf == NULL) return(-1);
98 if (in->base == NULL) return(-1);
99 if (in->cur == NULL) return(-1);
100 if (in->buf->buffer == NULL) return(-1);
101
102 CHECK_BUFFER(in);
103
104 used = in->cur - in->buf->buffer->content;
105 ret = xmlBufferShrink(in->buf->buffer, used);
106 if (ret > 0) {
107 in->cur -= ret;
108 in->consumed += ret;
109 }
110 ret = xmlParserInputBufferRead(in->buf, len);
111 if (in->base != in->buf->buffer->content) {
112 /*
113 * the buffer has been realloced
114 */
115 index = in->cur - in->base;
116 in->base = in->buf->buffer->content;
117 in->cur = &in->buf->buffer->content[index];
118 }
119
120 CHECK_BUFFER(in);
121
122 return(ret);
123}
124
125/**
126 * xmlParserInputGrow:
127 * @in: an XML parser input
128 * @len: an indicative size for the lookahead
129 *
130 * This function increase the input for the parser. It tries to
131 * preserve pointers to the input buffer, and keep already read data
132 *
133 * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
134 * end of this entity
135 */
136int
137xmlParserInputGrow(xmlParserInputPtr in, int len) {
138 int ret;
139 int index;
140
141#ifdef DEBUG_INPUT
142 fprintf(stderr, "Grow\n");
143#endif
144 if (in->buf == NULL) return(-1);
145 if (in->base == NULL) return(-1);
146 if (in->cur == NULL) return(-1);
147 if (in->buf->buffer == NULL) return(-1);
148
149 CHECK_BUFFER(in);
150
151 index = in->cur - in->base;
152 if (in->buf->buffer->use > index + INPUT_CHUNK) {
153
154 CHECK_BUFFER(in);
155
156 return(0);
157 }
158 ret = xmlParserInputBufferGrow(in->buf, len);
159 if (in->base != in->buf->buffer->content) {
160 /*
161 * the buffer has been realloced
162 */
163 index = in->cur - in->base;
164 in->base = in->buf->buffer->content;
165 in->cur = &in->buf->buffer->content[index];
166 }
167
168 CHECK_BUFFER(in);
169
170 return(ret);
171}
172
173/**
174 * xmlParserInputShrink:
175 * @in: an XML parser input
176 *
177 * This function removes used input for the parser.
178 */
179void
180xmlParserInputShrink(xmlParserInputPtr in) {
181 int used;
182 int ret;
183 int index;
184
185#ifdef DEBUG_INPUT
186 fprintf(stderr, "Shrink\n");
187#endif
188 if (in->buf == NULL) return;
189 if (in->base == NULL) return;
190 if (in->cur == NULL) return;
191 if (in->buf->buffer == NULL) return;
192
193 CHECK_BUFFER(in);
194
195 used = in->cur - in->buf->buffer->content;
196 if (used > INPUT_CHUNK) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000197 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000198 if (ret > 0) {
199 in->cur -= ret;
200 in->consumed += ret;
201 }
202 }
203
204 CHECK_BUFFER(in);
205
206 if (in->buf->buffer->use > INPUT_CHUNK) {
207 return;
208 }
209 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
210 if (in->base != in->buf->buffer->content) {
211 /*
212 * the buffer has been realloced
213 */
214 index = in->cur - in->base;
215 in->base = in->buf->buffer->content;
216 in->cur = &in->buf->buffer->content[index];
217 }
218
219 CHECK_BUFFER(in);
220}
221
Daniel Veillard260a68f1998-08-13 03:39:55 +0000222/************************************************************************
223 * *
224 * Parser stacks related functions and macros *
225 * *
226 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000227
228int xmlSubstituteEntitiesDefaultValue = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000229int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000230
Daniel Veillard260a68f1998-08-13 03:39:55 +0000231/*
232 * Generic function for accessing stacks in the Parser Context
233 */
234
235#define PUSH_AND_POP(type, name) \
Daniel Veillard517752b1999-04-05 12:20:10 +0000236extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000237 if (ctxt->name##Nr >= ctxt->name##Max) { \
238 ctxt->name##Max *= 2; \
239 ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
240 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
241 if (ctxt->name##Tab == NULL) { \
242 fprintf(stderr, "realloc failed !\n"); \
243 exit(1); \
244 } \
245 } \
246 ctxt->name##Tab[ctxt->name##Nr] = value; \
247 ctxt->name = value; \
248 return(ctxt->name##Nr++); \
249} \
Daniel Veillard517752b1999-04-05 12:20:10 +0000250extern type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000251 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000252 if (ctxt->name##Nr <= 0) return(0); \
253 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000254 if (ctxt->name##Nr > 0) \
255 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
256 else \
257 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000258 ret = ctxt->name##Tab[ctxt->name##Nr]; \
259 ctxt->name##Tab[ctxt->name##Nr] = 0; \
260 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000261} \
262
263PUSH_AND_POP(xmlParserInputPtr, input)
264PUSH_AND_POP(xmlNodePtr, node)
265
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000266/*
267 * Macros for accessing the content. Those should be used only by the parser,
268 * and not exported.
269 *
270 * Dirty macros, i.e. one need to make assumption on the context to use them
271 *
272 * CUR_PTR return the current pointer to the CHAR to be parsed.
273 * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
274 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
275 * in UNICODE mode. This should be used internally by the parser
276 * only to compare to ASCII values otherwise it would break when
277 * running with UTF-8 encoding.
278 * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
279 * to compare on ASCII based substring.
280 * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
281 * strings within the parser.
282 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000283 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000284 *
285 * CURRENT Returns the current char value, with the full decoding of
286 * UTF-8 if we are using this mode. It returns an int.
287 * NEXT Skip to the next character, this does the proper decoding
288 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard011b63c1999-06-02 17:44:04 +0000289 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000290 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000291
Daniel Veillardb05deb71999-08-10 19:04:08 +0000292#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000293#define SKIP(val) ctxt->input->cur += (val)
294#define NXT(val) ctxt->input->cur[(val)]
295#define CUR_PTR ctxt->input->cur
Daniel Veillardb05deb71999-08-10 19:04:08 +0000296#define SHRINK xmlParserInputShrink(ctxt->input); \
297 if ((*ctxt->input->cur == 0) && \
298 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
299 xmlPopInput(ctxt)
300
301#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
302 if ((*ctxt->input->cur == 0) && \
303 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
304 xmlPopInput(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000305
306#define SKIP_BLANKS \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000307 do { \
308 while (IS_BLANK(CUR)) NEXT; \
309 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
310 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
311 } while (IS_BLANK(CUR));
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000312
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000313#define CURRENT (*ctxt->input->cur)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000314#define NEXT { \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000315 if (ctxt->token != 0) ctxt->token = 0; \
316 else { \
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000317 if ((*ctxt->input->cur == 0) && \
318 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
319 xmlPopInput(ctxt); \
320 } else { \
321 if (*(ctxt->input->cur) == '\n') { \
322 ctxt->input->line++; ctxt->input->col = 1; \
323 } else ctxt->input->col++; \
324 ctxt->input->cur++; \
325 if (*ctxt->input->cur == 0) \
326 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000327 } \
328 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
329 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
330}}
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000331
Daniel Veillard260a68f1998-08-13 03:39:55 +0000332
Daniel Veillardb05deb71999-08-10 19:04:08 +0000333/************************************************************************
334 * *
335 * Commodity functions to handle entities processing *
336 * *
337 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +0000338
Daniel Veillard11e00581998-10-24 18:27:49 +0000339/**
340 * xmlPopInput:
341 * @ctxt: an XML parser context
342 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000343 * xmlPopInput: the current input pointed by ctxt->input came to an end
344 * pop it and return the next char.
345 *
Daniel Veillard1e346af1999-02-22 10:33:01 +0000346 * Returns the current CHAR in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000347 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000348CHAR
349xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000350 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardbc50b591999-03-01 12:28:53 +0000351 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000352 if ((*ctxt->input->cur == 0) &&
353 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
354 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000355 return(CUR);
356}
357
Daniel Veillard11e00581998-10-24 18:27:49 +0000358/**
359 * xmlPushInput:
360 * @ctxt: an XML parser context
361 * @input: an XML parser input fragment (entity, XML fragment ...).
362 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000363 * xmlPushInput: switch to a new input stream which is stacked on top
364 * of the previous one(s).
365 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000366void
367xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000368 if (input == NULL) return;
369 inputPush(ctxt, input);
370}
371
Daniel Veillard11e00581998-10-24 18:27:49 +0000372/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000373 * xmlFreeInputStream:
Daniel Veillardb05deb71999-08-10 19:04:08 +0000374 * @input: an xmlP arserInputPtr
Daniel Veillardd692aa41999-02-28 21:54:31 +0000375 *
376 * Free up an input stream.
377 */
378void
379xmlFreeInputStream(xmlParserInputPtr input) {
380 if (input == NULL) return;
381
Daniel Veillardbc50b591999-03-01 12:28:53 +0000382 if (input->filename != NULL) free((char *) input->filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000383 if (input->directory != NULL) free((char *) input->directory);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000384 if ((input->free != NULL) && (input->base != NULL))
385 input->free((char *) input->base);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000386 if (input->buf != NULL)
387 xmlFreeParserInputBuffer(input->buf);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000388 memset(input, -1, sizeof(xmlParserInput));
389 free(input);
390}
391
392/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000393 * xmlNewInputStream:
394 * @ctxt: an XML parser context
395 *
396 * Create a new input stream structure
397 * Returns the new input stream or NULL
398 */
399xmlParserInputPtr
400xmlNewInputStream(xmlParserCtxtPtr ctxt) {
401 xmlParserInputPtr input;
402
403 input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
404 if (input == NULL) {
405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406 ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
407 return(NULL);
408 }
409 input->filename = NULL;
410 input->directory = NULL;
411 input->base = NULL;
412 input->cur = NULL;
413 input->buf = NULL;
414 input->line = 1;
415 input->col = 1;
416 input->buf = NULL;
417 input->free = NULL;
418 input->consumed = 0;
419 return(input);
420}
421
422/**
Daniel Veillard11e00581998-10-24 18:27:49 +0000423 * xmlNewEntityInputStream:
424 * @ctxt: an XML parser context
425 * @entity: an Entity pointer
426 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000427 * Create a new input stream based on an xmlEntityPtr
Daniel Veillard1e346af1999-02-22 10:33:01 +0000428 * Returns the new input stream
Daniel Veillard260a68f1998-08-13 03:39:55 +0000429 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000430xmlParserInputPtr
431xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000432 xmlParserInputPtr input;
433
434 if (entity == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000436 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000437 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000438 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000439 }
440 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000442 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000443 "internal: xmlNewEntityInputStream entity->input = NULL\n");
Daniel Veillardccb09631998-10-27 06:21:04 +0000444 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000445 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000446 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000447 if (input == NULL) {
Daniel Veillardccb09631998-10-27 06:21:04 +0000448 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000449 }
450 input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
451 input->base = entity->content;
452 input->cur = entity->content;
Daniel Veillardccb09631998-10-27 06:21:04 +0000453 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000454}
455
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000456/**
457 * xmlNewStringInputStream:
458 * @ctxt: an XML parser context
Daniel Veillardb05deb71999-08-10 19:04:08 +0000459 * @buffer: an memory buffer
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000460 *
461 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +0000462 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000463 */
464xmlParserInputPtr
Daniel Veillardb05deb71999-08-10 19:04:08 +0000465xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000466 xmlParserInputPtr input;
467
Daniel Veillardb05deb71999-08-10 19:04:08 +0000468 if (buffer == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000470 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000471 "internal: xmlNewStringInputStream string = NULL\n");
472 return(NULL);
473 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000474 input = xmlNewInputStream(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000475 if (input == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000476 return(NULL);
477 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000478 input->base = buffer;
479 input->cur = buffer;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000480 return(input);
481}
482
Daniel Veillard011b63c1999-06-02 17:44:04 +0000483/**
484 * xmlNewInputFromFile:
485 * @ctxt: an XML parser context
486 * @filename: the filename to use as entity
487 *
488 * Create a new input stream based on a file.
489 *
490 * Returns the new input stream or NULL in case of error
491 */
492xmlParserInputPtr
493xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000494 xmlParserInputBufferPtr buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000495 xmlParserInputPtr inputStream;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000496 char *directory = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000497
Daniel Veillardb05deb71999-08-10 19:04:08 +0000498 if (ctxt == NULL) return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000499 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000500 if (buf == NULL) {
501 char name[1024];
Daniel Veillard011b63c1999-06-02 17:44:04 +0000502
Daniel Veillardb05deb71999-08-10 19:04:08 +0000503 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
504#ifdef WIN32
505 sprintf(name, "%s\\%s", ctxt->input->directory, filename);
506#else
507 sprintf(name, "%s/%s", ctxt->input->directory, filename);
508#endif
509 buf = xmlParserInputBufferCreateFilename(name,
510 XML_CHAR_ENCODING_NONE);
511 if (buf != NULL)
512 directory = strdup(ctxt->input->directory);
513 }
514 if ((buf == NULL) && (ctxt->directory != NULL)) {
515#ifdef WIN32
516 sprintf(name, "%s\\%s", ctxt->directory, filename);
517#else
518 sprintf(name, "%s/%s", ctxt->directory, filename);
519#endif
520 buf = xmlParserInputBufferCreateFilename(name,
521 XML_CHAR_ENCODING_NONE);
522 if (buf != NULL)
523 directory = strdup(ctxt->directory);
524 }
525 if (buf == NULL)
526 return(NULL);
527 }
528 if (directory == NULL)
529 directory = xmlParserGetDirectory(filename);
530
531 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000532 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000533 if (directory != NULL) free((char *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000534 return(NULL);
535 }
536
537 inputStream->filename = strdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000538 inputStream->directory = directory;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000539 inputStream->buf = buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000540
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000541 inputStream->base = inputStream->buf->buffer->content;
542 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000543 if ((ctxt->directory == NULL) && (directory != NULL))
544 ctxt->directory = directory;
Daniel Veillard011b63c1999-06-02 17:44:04 +0000545 return(inputStream);
546}
547
548/************************************************************************
549 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +0000550 * Commodity functions to handle parser contexts *
551 * *
552 ************************************************************************/
553
554/**
555 * xmlInitParserCtxt:
556 * @ctxt: an XML parser context
557 *
558 * Initialize a parser context
559 */
560
561void
562xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
563{
564 xmlSAXHandler *sax;
565
566 sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
567 if (sax == NULL) {
568 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
569 }
570
571 /* Allocate the Input stack */
572 ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
573 ctxt->inputNr = 0;
574 ctxt->inputMax = 5;
575 ctxt->input = NULL;
576 ctxt->version = NULL;
577 ctxt->encoding = NULL;
578 ctxt->standalone = -1;
579 ctxt->hasExternalSubset = 0;
580 ctxt->hasPErefs = 0;
581 ctxt->html = 0;
582 ctxt->external = 0;
583 ctxt->instate = XML_PARSER_PROLOG;
584 ctxt->token = 0;
585 ctxt->directory = NULL;
586
587 /* Allocate the Node stack */
588 ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
589 ctxt->nodeNr = 0;
590 ctxt->nodeMax = 10;
591 ctxt->node = NULL;
592
593 if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
594 else {
595 ctxt->sax = sax;
596 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
597 }
598 ctxt->userData = ctxt;
599 ctxt->myDoc = NULL;
600 ctxt->wellFormed = 1;
601 ctxt->valid = 1;
602 ctxt->validate = xmlDoValidityCheckingDefaultValue;
603 ctxt->vctxt.userData = ctxt;
604 ctxt->vctxt.error = xmlParserValidityError;
605 ctxt->vctxt.warning = xmlParserValidityWarning;
606 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
607 ctxt->record_info = 0;
608 xmlInitNodeInfoSeq(&ctxt->node_seq);
609}
610
611/**
612 * xmlFreeParserCtxt:
613 * @ctxt: an XML parser context
614 *
615 * Free all the memory used by a parser context. However the parsed
616 * document in ctxt->myDoc is not freed.
617 */
618
619void
620xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
621{
622 xmlParserInputPtr input;
623
624 if (ctxt == NULL) return;
625
626 while ((input = inputPop(ctxt)) != NULL) {
627 xmlFreeInputStream(input);
628 }
629
630 if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
631 if (ctxt->inputTab != NULL) free(ctxt->inputTab);
632 if (ctxt->version != NULL) free((char *) ctxt->version);
633 if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
634 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
635 free(ctxt->sax);
636 if (ctxt->directory != NULL) free((char *) ctxt->directory);
637 free(ctxt);
638}
639
640/**
641 * xmlNewParserCtxt:
642 *
643 * Allocate and initialize a new parser context.
644 *
645 * Returns the xmlParserCtxtPtr or NULL
646 */
647
648xmlParserCtxtPtr
649xmlNewParserCtxt()
650{
651 xmlParserCtxtPtr ctxt;
652
653 ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
654 if (ctxt == NULL) {
655 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
656 perror("malloc");
657 return(NULL);
658 }
659 xmlInitParserCtxt(ctxt);
660 return(ctxt);
661}
662
663/**
664 * xmlClearParserCtxt:
665 * @ctxt: an XML parser context
666 *
667 * Clear (release owned resources) and reinitialize a parser context
668 */
669
670void
671xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
672{
673 xmlClearNodeInfoSeq(&ctxt->node_seq);
674 xmlInitParserCtxt(ctxt);
675}
676
677/************************************************************************
678 * *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000679 * Commodity functions to handle entities *
680 * *
681 ************************************************************************/
682
Daniel Veillardb05deb71999-08-10 19:04:08 +0000683void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
684void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
685
686/**
687 * xmlParseCharRef:
688 * @ctxt: an XML parser context
689 *
690 * parse Reference declarations
691 *
692 * [66] CharRef ::= '&#' [0-9]+ ';' |
693 * '&#x' [0-9a-fA-F]+ ';'
694 *
695 * [ WFC: Legal Character ]
696 * Characters referred to using character references must match the
697 * production for Char.
698 *
699 * Returns the value parsed (as an int)
700 */
701int
702xmlParseCharRef(xmlParserCtxtPtr ctxt) {
703 int val = 0;
704
705 if (ctxt->token != 0) {
706 val = ctxt->token;
707 ctxt->token = 0;
708 return(val);
709 }
710 if ((CUR == '&') && (NXT(1) == '#') &&
711 (NXT(2) == 'x')) {
712 SKIP(3);
713 while (CUR != ';') {
714 if ((CUR >= '0') && (CUR <= '9'))
715 val = val * 16 + (CUR - '0');
716 else if ((CUR >= 'a') && (CUR <= 'f'))
717 val = val * 16 + (CUR - 'a') + 10;
718 else if ((CUR >= 'A') && (CUR <= 'F'))
719 val = val * 16 + (CUR - 'A') + 10;
720 else {
721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
722 ctxt->sax->error(ctxt->userData,
723 "xmlParseCharRef: invalid hexadecimal value\n");
724 ctxt->wellFormed = 0;
725 val = 0;
726 break;
727 }
728 NEXT;
729 }
730 if (CUR == ';')
731 NEXT;
732 } else if ((CUR == '&') && (NXT(1) == '#')) {
733 SKIP(2);
734 while (CUR != ';') {
735 if ((CUR >= '0') && (CUR <= '9'))
736 val = val * 10 + (CUR - '0');
737 else {
738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
739 ctxt->sax->error(ctxt->userData,
740 "xmlParseCharRef: invalid decimal value\n");
741 ctxt->wellFormed = 0;
742 val = 0;
743 break;
744 }
745 NEXT;
746 }
747 if (CUR == ';')
748 NEXT;
749 } else {
750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
751 ctxt->sax->error(ctxt->userData,
752 "xmlParseCharRef: invalid value\n");
753 ctxt->wellFormed = 0;
754 }
755
756 /*
757 * [ WFC: Legal Character ]
758 * Characters referred to using character references must match the
759 * production for Char.
760 */
761 if (IS_CHAR(val)) {
762 return(val);
763 } else {
764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
765 ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
766 val);
767 ctxt->wellFormed = 0;
768 }
769 return(0);
770}
771
772/**
773 * xmlParserHandleReference:
774 * @ctxt: the parser context
775 *
776 * [67] Reference ::= EntityRef | CharRef
777 *
778 * [68] EntityRef ::= '&' Name ';'
779 *
780 * [ WFC: Entity Declared ]
781 * the Name given in the entity reference must match that in an entity
782 * declaration, except that well-formed documents need not declare any
783 * of the following entities: amp, lt, gt, apos, quot.
784 *
785 * [ WFC: Parsed Entity ]
786 * An entity reference must not contain the name of an unparsed entity
787 *
788 * [66] CharRef ::= '&#' [0-9]+ ';' |
789 * '&#x' [0-9a-fA-F]+ ';'
790 *
791 * A PEReference may have been detectect in the current input stream
792 * the handling is done accordingly to
793 * http://www.w3.org/TR/REC-xml#entproc
794 */
795void
796xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
797 xmlParserInputPtr input;
798 CHAR *name;
799 xmlEntityPtr ent = NULL;
800
801 if (ctxt->token != 0) return;
802 if (CUR != '&') return;
803 GROW;
804 if ((CUR == '&') && (NXT(1) == '#')) {
805 switch(ctxt->instate) {
806 case XML_PARSER_CDATA_SECTION:
807 return;
808 case XML_PARSER_COMMENT:
809 return;
810 case XML_PARSER_EOF:
811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
812 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
813 ctxt->wellFormed = 0;
814 return;
815 case XML_PARSER_PROLOG:
816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
817 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
818 ctxt->wellFormed = 0;
819 return;
820 case XML_PARSER_EPILOG:
821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
822 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
823 ctxt->wellFormed = 0;
824 return;
825 case XML_PARSER_DTD:
826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
827 ctxt->sax->error(ctxt->userData,
828 "CharRef are forbiden in DTDs!\n");
829 ctxt->wellFormed = 0;
830 return;
831 case XML_PARSER_ENTITY_DECL:
832 /* we just ignore it there */
833 return;
834 case XML_PARSER_ENTITY_VALUE:
835 /*
836 * NOTE: in the case of entity values, we don't do the
837 * substitution here since we need the litteral
838 * entity value to be able to save the internal
839 * subset of the document.
840 * This will be handled by xmlDecodeEntities
841 */
842 return;
843 case XML_PARSER_CONTENT:
844 case XML_PARSER_ATTRIBUTE_VALUE:
845 /* TODO this may not be Ok for UTF-8, multibyte sequence */
846 ctxt->token = xmlParseCharRef(ctxt);
847 return;
848 }
849 return;
850 }
851
852 switch(ctxt->instate) {
853 case XML_PARSER_CDATA_SECTION:
854 return;
855 case XML_PARSER_COMMENT:
856 return;
857 case XML_PARSER_EOF:
858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
859 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
860 ctxt->wellFormed = 0;
861 return;
862 case XML_PARSER_PROLOG:
863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
864 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
865 ctxt->wellFormed = 0;
866 return;
867 case XML_PARSER_EPILOG:
868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
869 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
870 ctxt->wellFormed = 0;
871 return;
872 case XML_PARSER_ENTITY_VALUE:
873 /*
874 * NOTE: in the case of entity values, we don't do the
875 * substitution here since we need the litteral
876 * entity value to be able to save the internal
877 * subset of the document.
878 * This will be handled by xmlDecodeEntities
879 */
880 return;
881 case XML_PARSER_ATTRIBUTE_VALUE:
882 /*
883 * NOTE: in the case of attributes values, we don't do the
884 * substitution here unless we are in a mode where
885 * the parser is explicitely asked to substitute
886 * entities. The SAX callback is called with values
887 * without entity substitution.
888 * This will then be handled by xmlDecodeEntities
889 */
890 if (ctxt->replaceEntities == 0) return;
891 break;
892 case XML_PARSER_ENTITY_DECL:
893 /*
894 * we just ignore it there
895 * the substitution will be done once the entity is referenced
896 */
897 return;
898 case XML_PARSER_DTD:
899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
900 ctxt->sax->error(ctxt->userData,
901 "Entity references are forbiden in DTDs!\n");
902 ctxt->wellFormed = 0;
903 return;
904 case XML_PARSER_CONTENT:
905 if (ctxt->replaceEntities == 0) return;
906 break;
907 }
908
909 NEXT;
910 name = xmlScanName(ctxt);
911 if (name == NULL) {
912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
913 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
914 ctxt->wellFormed = 0;
915 ctxt->token = '&';
916 return;
917 }
918 if (NXT(xmlStrlen(name)) != ';') {
919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
920 ctxt->sax->error(ctxt->userData,
921 "Entity reference: ';' expected\n");
922 ctxt->wellFormed = 0;
923 ctxt->token = '&';
924 free(name);
925 return;
926 }
927 SKIP(xmlStrlen(name) + 1);
928 if (ctxt->sax != NULL) {
929 if (ctxt->sax->getEntity != NULL)
930 ent = ctxt->sax->getEntity(ctxt->userData, name);
931 }
932
933 /*
934 * [ WFC: Entity Declared ]
935 * the Name given in the entity reference must match that in an entity
936 * declaration, except that well-formed documents need not declare any
937 * of the following entities: amp, lt, gt, apos, quot.
938 */
939 if (ent == NULL)
940 ent = xmlGetPredefinedEntity(name);
941 if (ent == NULL) {
942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
943 ctxt->sax->error(ctxt->userData,
944 "Entity reference: entity %s not declared\n",
945 name);
946 ctxt->wellFormed = 0;
947 free(name);
948 return;
949 }
950
951 /*
952 * [ WFC: Parsed Entity ]
953 * An entity reference must not contain the name of an unparsed entity
954 */
955 if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
957 ctxt->sax->error(ctxt->userData,
958 "Entity reference to unparsed entity %s\n", name);
959 ctxt->wellFormed = 0;
960 }
961
962 if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
963 ctxt->token = ent->content[0];
964 free(name);
965 return;
966 }
967 input = xmlNewEntityInputStream(ctxt, ent);
968 xmlPushInput(ctxt, input);
969 free(name);
970 return;
971}
972
973/**
974 * xmlParserHandlePEReference:
975 * @ctxt: the parser context
976 *
977 * [69] PEReference ::= '%' Name ';'
978 *
979 * [ WFC: No Recursion ]
980 * TODO A parsed entity must not contain a recursive
981 * reference to itself, either directly or indirectly.
982 *
983 * [ WFC: Entity Declared ]
984 * In a document without any DTD, a document with only an internal DTD
985 * subset which contains no parameter entity references, or a document
986 * with "standalone='yes'", ... ... The declaration of a parameter
987 * entity must precede any reference to it...
988 *
989 * [ VC: Entity Declared ]
990 * In a document with an external subset or external parameter entities
991 * with "standalone='no'", ... ... The declaration of a parameter entity
992 * must precede any reference to it...
993 *
994 * [ WFC: In DTD ]
995 * Parameter-entity references may only appear in the DTD.
996 * NOTE: misleading but this is handled.
997 *
998 * A PEReference may have been detected in the current input stream
999 * the handling is done accordingly to
1000 * http://www.w3.org/TR/REC-xml#entproc
1001 * i.e.
1002 * - Included in literal in entity values
1003 * - Included as Paraemeter Entity reference within DTDs
1004 */
1005void
1006xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1007 CHAR *name;
1008 xmlEntityPtr entity = NULL;
1009 xmlParserInputPtr input;
1010
1011 if (ctxt->token != 0) return;
1012 if (CUR != '%') return;
1013 switch(ctxt->instate) {
1014 case XML_PARSER_CDATA_SECTION:
1015 return;
1016 case XML_PARSER_COMMENT:
1017 return;
1018 case XML_PARSER_EOF:
1019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1020 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1021 ctxt->wellFormed = 0;
1022 return;
1023 case XML_PARSER_PROLOG:
1024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1025 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1026 ctxt->wellFormed = 0;
1027 return;
1028 case XML_PARSER_ENTITY_DECL:
1029 case XML_PARSER_CONTENT:
1030 case XML_PARSER_ATTRIBUTE_VALUE:
1031 /* we just ignore it there */
1032 return;
1033 case XML_PARSER_EPILOG:
1034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1035 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1036 ctxt->wellFormed = 0;
1037 return;
1038 case XML_PARSER_ENTITY_VALUE:
1039 /*
1040 * NOTE: in the case of entity values, we don't do the
1041 * substitution here since we need the litteral
1042 * entity value to be able to save the internal
1043 * subset of the document.
1044 * This will be handled by xmlDecodeEntities
1045 */
1046 return;
1047 case XML_PARSER_DTD:
1048 /*
1049 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1050 * In the internal DTD subset, parameter-entity references
1051 * can occur only where markup declarations can occur, not
1052 * within markup declarations.
1053 * In that case this is handled in xmlParseMarkupDecl
1054 */
1055 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1056 return;
1057 }
1058
1059 NEXT;
1060 name = xmlParseName(ctxt);
1061 if (name == NULL) {
1062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1063 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1064 ctxt->wellFormed = 0;
1065 } else {
1066 if (CUR == ';') {
1067 NEXT;
1068 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1069 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1070 if (entity == NULL) {
1071
1072 /*
1073 * [ WFC: Entity Declared ]
1074 * In a document without any DTD, a document with only an
1075 * internal DTD subset which contains no parameter entity
1076 * references, or a document with "standalone='yes'", ...
1077 * ... The declaration of a parameter entity must precede
1078 * any reference to it...
1079 */
1080 if ((ctxt->standalone == 1) ||
1081 ((ctxt->hasExternalSubset == 0) &&
1082 (ctxt->hasPErefs == 0))) {
1083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084 ctxt->sax->error(ctxt->userData,
1085 "PEReference: %%%s; not found\n", name);
1086 ctxt->wellFormed = 0;
1087 } else {
1088 /*
1089 * [ VC: Entity Declared ]
1090 * In a document with an external subset or external
1091 * parameter entities with "standalone='no'", ...
1092 * ... The declaration of a parameter entity must precede
1093 * any reference to it...
1094 */
1095 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1096 ctxt->sax->warning(ctxt->userData,
1097 "PEReference: %%%s; not found\n", name);
1098 ctxt->valid = 0;
1099 }
1100 } else {
1101 if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1102 (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1103 /*
1104 * TODO !!!! handle the extra spaces added before and after
1105 * c.f. http://www.w3.org/TR/REC-xml#as-PE
1106 * TODO !!!! Avoid quote processing in parameters value
1107 * c.f. http://www.w3.org/TR/REC-xml#inliteral
1108 */
1109 input = xmlNewEntityInputStream(ctxt, entity);
1110 xmlPushInput(ctxt, input);
1111 } else {
1112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1113 ctxt->sax->error(ctxt->userData,
1114 "xmlHandlePEReference: %s is not a parameter entity\n",
1115 name);
1116 ctxt->wellFormed = 0;
1117 }
1118 }
1119 } else {
1120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1121 ctxt->sax->error(ctxt->userData,
1122 "xmlHandlePEReference: expecting ';'\n");
1123 ctxt->wellFormed = 0;
1124 }
1125 free(name);
1126 }
1127}
1128
Daniel Veillard011b63c1999-06-02 17:44:04 +00001129/*
1130 * Macro used to grow the current buffer.
1131 */
1132#define growBuffer(buffer) { \
1133 buffer##_size *= 2; \
1134 buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1135 if (buffer == NULL) { \
1136 perror("realloc failed"); \
1137 exit(1); \
1138 } \
1139}
1140
Daniel Veillard011b63c1999-06-02 17:44:04 +00001141/**
1142 * xmlDecodeEntities:
1143 * @ctxt: the parser context
1144 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1145 * @len: the len to decode (in bytes !), -1 for no size limit
1146 * @end: an end marker CHAR, 0 if none
1147 * @end2: an end marker CHAR, 0 if none
1148 * @end3: an end marker CHAR, 0 if none
1149 *
1150 * [67] Reference ::= EntityRef | CharRef
1151 *
1152 * [69] PEReference ::= '%' Name ';'
1153 *
1154 * Returns A newly allocated string with the substitution done. The caller
1155 * must deallocate it !
1156 */
1157CHAR *
1158xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1159 CHAR end, CHAR end2, CHAR end3) {
1160 CHAR *buffer = NULL;
1161 int buffer_size = 0;
1162 CHAR *out = NULL;
1163
Daniel Veillardb05deb71999-08-10 19:04:08 +00001164 CHAR *current = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001165 xmlEntityPtr ent;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001166 int nbchars = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001167 unsigned int max = (unsigned int) len;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001168 CHAR cur;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001169
1170 /*
1171 * allocate a translation buffer.
1172 */
1173 buffer_size = 1000;
1174 buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
1175 if (buffer == NULL) {
1176 perror("xmlDecodeEntities: malloc failed");
1177 return(NULL);
1178 }
1179 out = buffer;
1180
1181 /*
1182 * Ok loop until we reach one of the ending char or a size limit.
1183 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001184 cur = CUR;
1185 while ((nbchars < max) && (cur != end) &&
1186 (cur != end2) && (cur != end3)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001187
Daniel Veillardb05deb71999-08-10 19:04:08 +00001188 if (cur == 0) break;
1189 if ((cur == '&') && (NXT(1) == '#')) {
1190 int val = xmlParseCharRef(ctxt);
1191 *out++ = val;
1192 nbchars += 3;
1193 } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1194 ent = xmlParseEntityRef(ctxt);
1195 if ((ent != NULL) &&
1196 (ctxt->replaceEntities != 0)) {
1197 current = ent->content;
1198 while (*current != 0) {
1199 *out++ = *current++;
1200 if (out - buffer > buffer_size - 100) {
1201 int index = out - buffer;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001202
Daniel Veillardb05deb71999-08-10 19:04:08 +00001203 growBuffer(buffer);
1204 out = &buffer[index];
Daniel Veillard011b63c1999-06-02 17:44:04 +00001205 }
1206 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001207 nbchars += 3 + xmlStrlen(ent->name);
1208 } else if (ent != NULL) {
1209 int i = xmlStrlen(ent->name);
1210 const CHAR *cur = ent->name;
1211
1212 nbchars += i + 2;
1213 *out++ = '&';
1214 if (out - buffer > buffer_size - i - 100) {
1215 int index = out - buffer;
1216
1217 growBuffer(buffer);
1218 out = &buffer[index];
1219 }
1220 for (;i > 0;i--)
1221 *out++ = *cur++;
1222 *out++ = ';';
Daniel Veillard011b63c1999-06-02 17:44:04 +00001223 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001224 } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001225 /*
1226 * a PEReference induce to switch the entity flow,
1227 * we break here to flush the current set of chars
1228 * parsed if any. We will be called back later.
1229 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001230 if (nbchars != 0) break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001231
1232 xmlParsePEReference(ctxt);
1233
1234 /*
1235 * Pop-up of finished entities.
1236 */
1237 while ((CUR == 0) && (ctxt->inputNr > 1))
1238 xmlPopInput(ctxt);
1239
Daniel Veillardb05deb71999-08-10 19:04:08 +00001240 break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001241 } else {
1242 /* TODO: invalid for UTF-8 , use COPY(out); */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001243 *out++ = cur;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001244 nbchars++;
Raph Levien05240da1999-06-15 21:27:11 +00001245 if (out - buffer > buffer_size - 100) {
1246 int index = out - buffer;
1247
1248 growBuffer(buffer);
1249 out = &buffer[index];
1250 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00001251 NEXT;
1252 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001253 cur = CUR;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001254 }
1255 *out++ = 0;
1256 return(buffer);
1257}
1258
Daniel Veillard260a68f1998-08-13 03:39:55 +00001259
1260/************************************************************************
1261 * *
Daniel Veillard27d88741999-05-29 11:51:49 +00001262 * Commodity functions to handle encodings *
1263 * *
1264 ************************************************************************/
1265
1266/**
1267 * xmlSwitchEncoding:
1268 * @ctxt: the parser context
1269 * @len: the len of @cur
1270 *
1271 * change the input functions when discovering the character encoding
1272 * of a given entity.
1273 *
1274 */
1275void
1276xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1277{
1278 switch (enc) {
1279 case XML_CHAR_ENCODING_ERROR:
1280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1281 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1282 ctxt->wellFormed = 0;
1283 break;
1284 case XML_CHAR_ENCODING_NONE:
1285 /* let's assume it's UTF-8 without the XML decl */
1286 return;
1287 case XML_CHAR_ENCODING_UTF8:
1288 /* default encoding, no conversion should be needed */
1289 return;
1290 case XML_CHAR_ENCODING_UTF16LE:
1291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1292 ctxt->sax->error(ctxt->userData,
1293 "char encoding UTF16 little endian not supported\n");
1294 break;
1295 case XML_CHAR_ENCODING_UTF16BE:
1296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1297 ctxt->sax->error(ctxt->userData,
1298 "char encoding UTF16 big endian not supported\n");
1299 break;
1300 case XML_CHAR_ENCODING_UCS4LE:
1301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1302 ctxt->sax->error(ctxt->userData,
1303 "char encoding USC4 little endian not supported\n");
1304 break;
1305 case XML_CHAR_ENCODING_UCS4BE:
1306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1307 ctxt->sax->error(ctxt->userData,
1308 "char encoding USC4 big endian not supported\n");
1309 break;
1310 case XML_CHAR_ENCODING_EBCDIC:
1311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1312 ctxt->sax->error(ctxt->userData,
1313 "char encoding EBCDIC not supported\n");
1314 break;
1315 case XML_CHAR_ENCODING_UCS4_2143:
1316 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1317 ctxt->sax->error(ctxt->userData,
1318 "char encoding UCS4 2143 not supported\n");
1319 break;
1320 case XML_CHAR_ENCODING_UCS4_3412:
1321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1322 ctxt->sax->error(ctxt->userData,
1323 "char encoding UCS4 3412 not supported\n");
1324 break;
1325 case XML_CHAR_ENCODING_UCS2:
1326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1327 ctxt->sax->error(ctxt->userData,
1328 "char encoding UCS2 not supported\n");
1329 break;
1330 case XML_CHAR_ENCODING_8859_1:
1331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1332 ctxt->sax->error(ctxt->userData,
1333 "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1334 break;
1335 case XML_CHAR_ENCODING_8859_2:
1336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1337 ctxt->sax->error(ctxt->userData,
1338 "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1339 break;
1340 case XML_CHAR_ENCODING_8859_3:
1341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1342 ctxt->sax->error(ctxt->userData,
1343 "char encoding ISO_8859_3 not supported\n");
1344 break;
1345 case XML_CHAR_ENCODING_8859_4:
1346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1347 ctxt->sax->error(ctxt->userData,
1348 "char encoding ISO_8859_4 not supported\n");
1349 break;
1350 case XML_CHAR_ENCODING_8859_5:
1351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1352 ctxt->sax->error(ctxt->userData,
1353 "char encoding ISO_8859_5 not supported\n");
1354 break;
1355 case XML_CHAR_ENCODING_8859_6:
1356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1357 ctxt->sax->error(ctxt->userData,
1358 "char encoding ISO_8859_6 not supported\n");
1359 break;
1360 case XML_CHAR_ENCODING_8859_7:
1361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1362 ctxt->sax->error(ctxt->userData,
1363 "char encoding ISO_8859_7 not supported\n");
1364 break;
1365 case XML_CHAR_ENCODING_8859_8:
1366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1367 ctxt->sax->error(ctxt->userData,
1368 "char encoding ISO_8859_8 not supported\n");
1369 break;
1370 case XML_CHAR_ENCODING_8859_9:
1371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1372 ctxt->sax->error(ctxt->userData,
1373 "char encoding ISO_8859_9 not supported\n");
1374 break;
1375 case XML_CHAR_ENCODING_2022_JP:
1376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1377 ctxt->sax->error(ctxt->userData,
1378 "char encoding ISO-2022-JPnot supported\n");
1379 break;
1380 case XML_CHAR_ENCODING_SHIFT_JIS:
1381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1382 ctxt->sax->error(ctxt->userData,
1383 "char encoding Shift_JISnot supported\n");
1384 break;
1385 case XML_CHAR_ENCODING_EUC_JP:
1386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1387 ctxt->sax->error(ctxt->userData,
1388 "char encoding EUC-JPnot supported\n");
1389 break;
1390 }
1391}
1392
1393/************************************************************************
1394 * *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001395 * Commodity functions to handle CHARs *
1396 * *
1397 ************************************************************************/
1398
Daniel Veillard11e00581998-10-24 18:27:49 +00001399/**
1400 * xmlStrndup:
1401 * @cur: the input CHAR *
1402 * @len: the len of @cur
1403 *
1404 * a strndup for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001405 *
1406 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001407 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001408CHAR *
1409xmlStrndup(const CHAR *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001410 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1411
1412 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001413 fprintf(stderr, "malloc of %ld byte failed\n",
1414 (len + 1) * (long)sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001415 return(NULL);
1416 }
1417 memcpy(ret, cur, len * sizeof(CHAR));
1418 ret[len] = 0;
1419 return(ret);
1420}
1421
Daniel Veillard11e00581998-10-24 18:27:49 +00001422/**
1423 * xmlStrdup:
1424 * @cur: the input CHAR *
1425 *
1426 * a strdup for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001427 *
1428 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001429 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001430CHAR *
1431xmlStrdup(const CHAR *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001432 const CHAR *p = cur;
1433
1434 while (IS_CHAR(*p)) p++;
1435 return(xmlStrndup(cur, p - cur));
1436}
1437
Daniel Veillard11e00581998-10-24 18:27:49 +00001438/**
1439 * xmlCharStrndup:
1440 * @cur: the input char *
1441 * @len: the len of @cur
1442 *
1443 * a strndup for char's to CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001444 *
1445 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001446 */
1447
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001448CHAR *
1449xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001450 int i;
1451 CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1452
1453 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001454 fprintf(stderr, "malloc of %ld byte failed\n",
1455 (len + 1) * (long)sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001456 return(NULL);
1457 }
1458 for (i = 0;i < len;i++)
1459 ret[i] = (CHAR) cur[i];
1460 ret[len] = 0;
1461 return(ret);
1462}
1463
Daniel Veillard11e00581998-10-24 18:27:49 +00001464/**
1465 * xmlCharStrdup:
1466 * @cur: the input char *
1467 * @len: the len of @cur
1468 *
1469 * a strdup for char's to CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001470 *
1471 * Returns a new CHAR * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001472 */
1473
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001474CHAR *
1475xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001476 const char *p = cur;
1477
1478 while (*p != '\0') p++;
1479 return(xmlCharStrndup(cur, p - cur));
1480}
1481
Daniel Veillard11e00581998-10-24 18:27:49 +00001482/**
1483 * xmlStrcmp:
1484 * @str1: the first CHAR *
1485 * @str2: the second CHAR *
1486 *
1487 * a strcmp for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001488 *
1489 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001490 */
1491
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001492int
1493xmlStrcmp(const CHAR *str1, const CHAR *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001494 register int tmp;
1495
1496 do {
1497 tmp = *str1++ - *str2++;
1498 if (tmp != 0) return(tmp);
1499 } while ((*str1 != 0) && (*str2 != 0));
1500 return (*str1 - *str2);
1501}
1502
Daniel Veillard11e00581998-10-24 18:27:49 +00001503/**
1504 * xmlStrncmp:
1505 * @str1: the first CHAR *
1506 * @str2: the second CHAR *
1507 * @len: the max comparison length
1508 *
1509 * a strncmp for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001510 *
1511 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00001512 */
1513
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001514int
1515xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001516 register int tmp;
1517
1518 if (len <= 0) return(0);
1519 do {
1520 tmp = *str1++ - *str2++;
1521 if (tmp != 0) return(tmp);
1522 len--;
1523 if (len <= 0) return(0);
1524 } while ((*str1 != 0) && (*str2 != 0));
1525 return (*str1 - *str2);
1526}
1527
Daniel Veillard11e00581998-10-24 18:27:49 +00001528/**
1529 * xmlStrchr:
1530 * @str: the CHAR * array
1531 * @val: the CHAR to search
1532 *
1533 * a strchr for CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001534 *
1535 * Returns the CHAR * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001536 */
1537
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001538const CHAR *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001539xmlStrchr(const CHAR *str, CHAR val) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001540 while (*str != 0) {
1541 if (*str == val) return((CHAR *) str);
1542 str++;
1543 }
1544 return(NULL);
1545}
1546
Daniel Veillard11e00581998-10-24 18:27:49 +00001547/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00001548 * xmlStrstr:
1549 * @str: the CHAR * array (haystack)
1550 * @val: the CHAR to search (needle)
1551 *
1552 * a strstr for CHAR's
1553 *
1554 * Returns the CHAR * for the first occurence or NULL.
1555 */
1556
1557const CHAR *
1558xmlStrstr(const CHAR *str, CHAR *val) {
1559 int n;
1560
1561 if (str == NULL) return(NULL);
1562 if (val == NULL) return(NULL);
1563 n = xmlStrlen(val);
1564
1565 if (n == 0) return(str);
1566 while (*str != 0) {
1567 if (*str == *val) {
1568 if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1569 }
1570 str++;
1571 }
1572 return(NULL);
1573}
1574
1575/**
1576 * xmlStrsub:
1577 * @str: the CHAR * array (haystack)
1578 * @start: the index of the first char (zero based)
1579 * @len: the length of the substring
1580 *
1581 * Extract a substring of a given string
1582 *
1583 * Returns the CHAR * for the first occurence or NULL.
1584 */
1585
1586CHAR *
1587xmlStrsub(const CHAR *str, int start, int len) {
1588 int i;
1589
1590 if (str == NULL) return(NULL);
1591 if (start < 0) return(NULL);
1592 if (len < 0) return(NULL);
1593
1594 for (i = 0;i < start;i++) {
1595 if (*str == 0) return(NULL);
1596 str++;
1597 }
1598 if (*str == 0) return(NULL);
1599 return(xmlStrndup(str, len));
1600}
1601
1602/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001603 * xmlStrlen:
1604 * @str: the CHAR * array
1605 *
1606 * lenght of a CHAR's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00001607 *
1608 * Returns the number of CHAR contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001609 */
1610
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001611int
1612xmlStrlen(const CHAR *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001613 int len = 0;
1614
1615 if (str == NULL) return(0);
1616 while (*str != 0) {
1617 str++;
1618 len++;
1619 }
1620 return(len);
1621}
1622
Daniel Veillard11e00581998-10-24 18:27:49 +00001623/**
1624 * xmlStrncat:
Daniel Veillard1e346af1999-02-22 10:33:01 +00001625 * @cur: the original CHAR * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001626 * @add: the CHAR * array added
1627 * @len: the length of @add
1628 *
1629 * a strncat for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001630 *
1631 * Returns a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001632 */
1633
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001634CHAR *
1635xmlStrncat(CHAR *cur, const CHAR *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001636 int size;
1637 CHAR *ret;
1638
1639 if ((add == NULL) || (len == 0))
1640 return(cur);
1641 if (cur == NULL)
1642 return(xmlStrndup(add, len));
1643
1644 size = xmlStrlen(cur);
1645 ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1646 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001647 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1648 (size + len + 1) * (long)sizeof(CHAR));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001649 return(cur);
1650 }
1651 memcpy(&ret[size], add, len * sizeof(CHAR));
1652 ret[size + len] = 0;
1653 return(ret);
1654}
1655
Daniel Veillard11e00581998-10-24 18:27:49 +00001656/**
1657 * xmlStrcat:
Daniel Veillard1e346af1999-02-22 10:33:01 +00001658 * @cur: the original CHAR * array
Daniel Veillard11e00581998-10-24 18:27:49 +00001659 * @add: the CHAR * array added
1660 *
1661 * a strcat for array of CHAR's
Daniel Veillard1e346af1999-02-22 10:33:01 +00001662 *
1663 * Returns a new CHAR * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001664 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001665CHAR *
1666xmlStrcat(CHAR *cur, const CHAR *add) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001667 const CHAR *p = add;
1668
1669 if (add == NULL) return(cur);
1670 if (cur == NULL)
1671 return(xmlStrdup(add));
1672
1673 while (IS_CHAR(*p)) p++;
1674 return(xmlStrncat(cur, add, p - add));
1675}
1676
1677/************************************************************************
1678 * *
1679 * Commodity functions, cleanup needed ? *
1680 * *
1681 ************************************************************************/
1682
Daniel Veillard11e00581998-10-24 18:27:49 +00001683/**
1684 * areBlanks:
1685 * @ctxt: an XML parser context
1686 * @str: a CHAR *
1687 * @len: the size of @str
1688 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001689 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00001690 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00001691 * TODO: Whether white space are significant has to be checked accordingly
1692 * to DTD informations if available
Daniel Veillard1e346af1999-02-22 10:33:01 +00001693 *
1694 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001695 */
1696
1697static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001698 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001699 xmlNodePtr lastChild;
1700
1701 for (i = 0;i < len;i++)
1702 if (!(IS_BLANK(str[i]))) return(0);
1703
1704 if (CUR != '<') return(0);
Daniel Veillard517752b1999-04-05 12:20:10 +00001705 if (ctxt->node == NULL) return(0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001706 if (ctxt->myDoc != NULL) {
1707 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1708 if (ret == 0) return(1);
1709 if (ret == 1) return(0);
1710 }
1711 /*
1712 * heuristic
1713 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00001714 lastChild = xmlGetLastChild(ctxt->node);
1715 if (lastChild == NULL) {
1716 if (ctxt->node->content != NULL) return(0);
1717 } else if (xmlNodeIsText(lastChild))
1718 return(0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001719 else if ((ctxt->node->childs != NULL) &&
1720 (xmlNodeIsText(ctxt->node->childs)))
1721 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001722 return(1);
1723}
1724
Daniel Veillard11e00581998-10-24 18:27:49 +00001725/**
1726 * xmlHandleEntity:
1727 * @ctxt: an XML parser context
1728 * @entity: an XML entity pointer.
1729 *
1730 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +00001731 * stream ? When do we just handle that as a set of chars ?
Daniel Veillardb05deb71999-08-10 19:04:08 +00001732 *
1733 * OBSOLETE: to be removed at some point.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001734 */
1735
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001736void
1737xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001738 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +00001739 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001740
1741 if (entity->content == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001743 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00001744 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001745 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001746 return;
1747 }
1748 len = xmlStrlen(entity->content);
1749 if (len <= 2) goto handle_as_char;
1750
1751 /*
1752 * Redefine its content as an input stream.
1753 */
Daniel Veillardccb09631998-10-27 06:21:04 +00001754 input = xmlNewEntityInputStream(ctxt, entity);
1755 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001756 return;
1757
1758handle_as_char:
1759 /*
1760 * Just handle the content as a set of chars.
1761 */
Daniel Veillard517752b1999-04-05 12:20:10 +00001762 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001763 ctxt->sax->characters(ctxt->userData, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001764
1765}
1766
1767/*
1768 * Forward definition for recusive behaviour.
1769 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00001770void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1771void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001772
1773/************************************************************************
1774 * *
1775 * Extra stuff for namespace support *
1776 * Relates to http://www.w3.org/TR/WD-xml-names *
1777 * *
1778 ************************************************************************/
1779
Daniel Veillard11e00581998-10-24 18:27:49 +00001780/**
1781 * xmlNamespaceParseNCName:
1782 * @ctxt: an XML parser context
1783 *
1784 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001785 *
1786 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1787 *
1788 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1789 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +00001790 *
1791 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001792 */
1793
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001794CHAR *
1795xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001796 CHAR buf[XML_MAX_NAMELEN];
1797 int len = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001798
1799 if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001800
1801 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1802 (CUR == '.') || (CUR == '-') ||
1803 (CUR == '_') ||
1804 (IS_COMBINING(CUR)) ||
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001805 (IS_EXTENDER(CUR))) {
1806 buf[len++] = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001807 NEXT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001808 if (len >= XML_MAX_NAMELEN) {
1809 fprintf(stderr,
1810 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1811 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1812 (CUR == '.') || (CUR == '-') ||
1813 (CUR == '_') ||
1814 (IS_COMBINING(CUR)) ||
1815 (IS_EXTENDER(CUR)))
1816 NEXT;
1817 break;
1818 }
1819 }
1820 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001821}
1822
Daniel Veillard11e00581998-10-24 18:27:49 +00001823/**
1824 * xmlNamespaceParseQName:
1825 * @ctxt: an XML parser context
1826 * @prefix: a CHAR **
1827 *
1828 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001829 *
1830 * [NS 5] QName ::= (Prefix ':')? LocalPart
1831 *
1832 * [NS 6] Prefix ::= NCName
1833 *
1834 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +00001835 *
1836 * Returns the function returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +00001837 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001838 */
1839
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001840CHAR *
1841xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001842 CHAR *ret = NULL;
1843
1844 *prefix = NULL;
1845 ret = xmlNamespaceParseNCName(ctxt);
1846 if (CUR == ':') {
1847 *prefix = ret;
1848 NEXT;
1849 ret = xmlNamespaceParseNCName(ctxt);
1850 }
1851
1852 return(ret);
1853}
1854
Daniel Veillard11e00581998-10-24 18:27:49 +00001855/**
Daniel Veillard517752b1999-04-05 12:20:10 +00001856 * xmlSplitQName:
1857 * @name: an XML parser context
1858 * @prefix: a CHAR **
1859 *
1860 * parse an XML qualified name string
1861 *
1862 * [NS 5] QName ::= (Prefix ':')? LocalPart
1863 *
1864 * [NS 6] Prefix ::= NCName
1865 *
1866 * [NS 7] LocalPart ::= NCName
1867 *
1868 * Returns the function returns the local part, and prefix is updated
1869 * to get the Prefix if any.
1870 */
1871
1872CHAR *
1873xmlSplitQName(const CHAR *name, CHAR **prefix) {
1874 CHAR *ret = NULL;
1875 const CHAR *q;
1876 const CHAR *cur = name;
1877
1878 *prefix = NULL;
1879 if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1880 q = cur++;
1881
1882 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1883 (*cur == '.') || (*cur == '-') ||
1884 (*cur == '_') ||
1885 (IS_COMBINING(*cur)) ||
1886 (IS_EXTENDER(*cur)))
1887 cur++;
1888
1889 ret = xmlStrndup(q, cur - q);
1890
1891 if (*cur == ':') {
1892 cur++;
1893 if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1894 *prefix = ret;
1895
1896 q = cur++;
1897
1898 while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1899 (*cur == '.') || (*cur == '-') ||
1900 (*cur == '_') ||
1901 (IS_COMBINING(*cur)) ||
1902 (IS_EXTENDER(*cur)))
1903 cur++;
1904
1905 ret = xmlStrndup(q, cur - q);
1906 }
1907
1908 return(ret);
1909}
1910/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001911 * xmlNamespaceParseNSDef:
1912 * @ctxt: an XML parser context
1913 *
1914 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00001915 *
1916 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1917 *
1918 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +00001919 *
1920 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00001921 */
1922
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001923CHAR *
1924xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001925 CHAR *name = NULL;
1926
1927 if ((CUR == 'x') && (NXT(1) == 'm') &&
1928 (NXT(2) == 'l') && (NXT(3) == 'n') &&
1929 (NXT(4) == 's')) {
1930 SKIP(5);
1931 if (CUR == ':') {
1932 NEXT;
1933 name = xmlNamespaceParseNCName(ctxt);
1934 }
1935 }
1936 return(name);
1937}
1938
Daniel Veillard11e00581998-10-24 18:27:49 +00001939/**
1940 * xmlParseQuotedString:
1941 * @ctxt: an XML parser context
1942 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001943 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillardb05deb71999-08-10 19:04:08 +00001944 * To be removed at next drop of binary compatibility
Daniel Veillard1e346af1999-02-22 10:33:01 +00001945 *
1946 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00001947 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001948CHAR *
1949xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001950 CHAR *ret = NULL;
1951 const CHAR *q;
1952
1953 if (CUR == '"') {
1954 NEXT;
1955 q = CUR_PTR;
1956 while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001957 if (CUR != '"') {
1958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001959 ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001960 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001961 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001962 ret = xmlStrndup(q, CUR_PTR - q);
1963 NEXT;
1964 }
1965 } else if (CUR == '\''){
1966 NEXT;
1967 q = CUR_PTR;
1968 while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001969 if (CUR != '\'') {
1970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001971 ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001972 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001973 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001974 ret = xmlStrndup(q, CUR_PTR - q);
1975 NEXT;
1976 }
1977 }
1978 return(ret);
1979}
1980
Daniel Veillard11e00581998-10-24 18:27:49 +00001981/**
1982 * xmlParseNamespace:
1983 * @ctxt: an XML parser context
1984 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001985 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1986 *
1987 * This is what the older xml-name Working Draft specified, a bunch of
1988 * other stuff may still rely on it, so support is still here as
1989 * if ot was declared on the root of the Tree:-(
Daniel Veillardb05deb71999-08-10 19:04:08 +00001990 *
1991 * To be removed at next drop of binary compatibility
Daniel Veillard260a68f1998-08-13 03:39:55 +00001992 */
1993
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001994void
1995xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001996 CHAR *href = NULL;
1997 CHAR *prefix = NULL;
1998 int garbage = 0;
1999
2000 /*
2001 * We just skipped "namespace" or "xml:namespace"
2002 */
2003 SKIP_BLANKS;
2004
2005 while (IS_CHAR(CUR) && (CUR != '>')) {
2006 /*
2007 * We can have "ns" or "prefix" attributes
2008 * Old encoding as 'href' or 'AS' attributes is still supported
2009 */
2010 if ((CUR == 'n') && (NXT(1) == 's')) {
2011 garbage = 0;
2012 SKIP(2);
2013 SKIP_BLANKS;
2014
2015 if (CUR != '=') continue;
2016 NEXT;
2017 SKIP_BLANKS;
2018
2019 href = xmlParseQuotedString(ctxt);
2020 SKIP_BLANKS;
2021 } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2022 (NXT(2) == 'e') && (NXT(3) == 'f')) {
2023 garbage = 0;
2024 SKIP(4);
2025 SKIP_BLANKS;
2026
2027 if (CUR != '=') continue;
2028 NEXT;
2029 SKIP_BLANKS;
2030
2031 href = xmlParseQuotedString(ctxt);
2032 SKIP_BLANKS;
2033 } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2034 (NXT(2) == 'e') && (NXT(3) == 'f') &&
2035 (NXT(4) == 'i') && (NXT(5) == 'x')) {
2036 garbage = 0;
2037 SKIP(6);
2038 SKIP_BLANKS;
2039
2040 if (CUR != '=') continue;
2041 NEXT;
2042 SKIP_BLANKS;
2043
2044 prefix = xmlParseQuotedString(ctxt);
2045 SKIP_BLANKS;
2046 } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2047 garbage = 0;
2048 SKIP(2);
2049 SKIP_BLANKS;
2050
2051 if (CUR != '=') continue;
2052 NEXT;
2053 SKIP_BLANKS;
2054
2055 prefix = xmlParseQuotedString(ctxt);
2056 SKIP_BLANKS;
2057 } else if ((CUR == '?') && (NXT(1) == '>')) {
2058 garbage = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002059 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002060 } else {
2061 /*
2062 * Found garbage when parsing the namespace
2063 */
2064 if (!garbage)
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002066 ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002067 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002068 NEXT;
2069 }
2070 }
2071
2072 MOVETO_ENDTAG(CUR_PTR);
2073 NEXT;
2074
2075 /*
2076 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002077 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +00002078 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002079 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
Daniel Veillard517752b1999-04-05 12:20:10 +00002080 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00002081
2082 if (prefix != NULL) free(prefix);
2083 if (href != NULL) free(href);
2084}
2085
2086/************************************************************************
2087 * *
2088 * The parser itself *
2089 * Relates to http://www.w3.org/TR/REC-xml *
2090 * *
2091 ************************************************************************/
2092
Daniel Veillard11e00581998-10-24 18:27:49 +00002093/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00002094 * xmlScanName:
2095 * @ctxt: an XML parser context
2096 *
2097 * Trickery: parse an XML name but without consuming the input flow
2098 * Needed for rollback cases.
2099 *
2100 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2101 * CombiningChar | Extender
2102 *
2103 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2104 *
2105 * [6] Names ::= Name (S Name)*
2106 *
2107 * Returns the Name parsed or NULL
2108 */
2109
2110CHAR *
2111xmlScanName(xmlParserCtxtPtr ctxt) {
2112 CHAR buf[XML_MAX_NAMELEN];
2113 int len = 0;
2114
2115 GROW;
2116 if (!IS_LETTER(CUR) && (CUR != '_') &&
2117 (CUR != ':')) {
2118 return(NULL);
2119 }
2120
2121 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2122 (NXT(len) == '.') || (NXT(len) == '-') ||
2123 (NXT(len) == '_') || (NXT(len) == ':') ||
2124 (IS_COMBINING(NXT(len))) ||
2125 (IS_EXTENDER(NXT(len)))) {
2126 buf[len] = NXT(len);
2127 len++;
2128 if (len >= XML_MAX_NAMELEN) {
2129 fprintf(stderr,
2130 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2131 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2132 (NXT(len) == '.') || (NXT(len) == '-') ||
2133 (NXT(len) == '_') || (NXT(len) == ':') ||
2134 (IS_COMBINING(NXT(len))) ||
2135 (IS_EXTENDER(NXT(len))))
2136 len++;
2137 break;
2138 }
2139 }
2140 return(xmlStrndup(buf, len));
2141}
2142
2143/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002144 * xmlParseName:
2145 * @ctxt: an XML parser context
2146 *
2147 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002148 *
2149 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2150 * CombiningChar | Extender
2151 *
2152 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2153 *
2154 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002155 *
2156 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002157 */
2158
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002159CHAR *
2160xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002161 CHAR buf[XML_MAX_NAMELEN];
2162 int len = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002163 CHAR cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002164
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002165 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002166 cur = CUR;
2167 if (!IS_LETTER(cur) && (cur != '_') &&
2168 (cur != ':')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002169 return(NULL);
2170 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002171
Daniel Veillardb05deb71999-08-10 19:04:08 +00002172 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2173 (cur == '.') || (cur == '-') ||
2174 (cur == '_') || (cur == ':') ||
2175 (IS_COMBINING(cur)) ||
2176 (IS_EXTENDER(cur))) {
2177 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002178 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002179 cur = CUR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002180 if (len >= XML_MAX_NAMELEN) {
2181 fprintf(stderr,
2182 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardb05deb71999-08-10 19:04:08 +00002183 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2184 (cur == '.') || (cur == '-') ||
2185 (cur == '_') || (cur == ':') ||
2186 (IS_COMBINING(cur)) ||
2187 (IS_EXTENDER(cur))) {
2188 NEXT;
2189 cur = CUR;
2190 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002191 break;
2192 }
2193 }
2194 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002195}
2196
Daniel Veillard11e00581998-10-24 18:27:49 +00002197/**
2198 * xmlParseNmtoken:
2199 * @ctxt: an XML parser context
2200 *
2201 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002202 *
2203 * [7] Nmtoken ::= (NameChar)+
2204 *
2205 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002206 *
2207 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002208 */
2209
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002210CHAR *
2211xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002212 CHAR buf[XML_MAX_NAMELEN];
2213 int len = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002214
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002215 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002216 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2217 (CUR == '.') || (CUR == '-') ||
2218 (CUR == '_') || (CUR == ':') ||
2219 (IS_COMBINING(CUR)) ||
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002220 (IS_EXTENDER(CUR))) {
2221 buf[len++] = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002222 NEXT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002223 if (len >= XML_MAX_NAMELEN) {
2224 fprintf(stderr,
2225 "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2226 while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2227 (CUR == '.') || (CUR == '-') ||
2228 (CUR == '_') || (CUR == ':') ||
2229 (IS_COMBINING(CUR)) ||
2230 (IS_EXTENDER(CUR)))
2231 NEXT;
2232 break;
2233 }
2234 }
2235 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002236}
2237
Daniel Veillard11e00581998-10-24 18:27:49 +00002238/**
2239 * xmlParseEntityValue:
2240 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00002241 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00002242 *
2243 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002244 *
2245 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2246 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002247 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002248 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002249 */
2250
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002251CHAR *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002252xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
2253 CHAR *ret = NULL;
2254 const CHAR *org = NULL;
2255 const CHAR *tst = NULL;
2256 const CHAR *temp = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002257 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002258
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002259 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002260 if (CUR == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002261 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2262 input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002263 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002264 org = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002265 /*
2266 * NOTE: 4.4.5 Included in Literal
2267 * When a parameter entity reference appears in a literal entity
2268 * value, ... a single or double quote character in the replacement
2269 * text is always treated as a normal data character and will not
2270 * terminate the literal.
2271 * In practice it means we stop the loop only when back at parsing
2272 * the initial entity and the quote is found
2273 */
2274 while ((CUR != '"') || (ctxt->input != input)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002275 tst = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002276 /*
2277 * NOTE: 4.4.7 Bypassed
2278 * When a general entity reference appears in the EntityValue in
2279 * an entity declaration, it is bypassed and left as is.
2280 * so XML_SUBSTITUTE_REF is not set.
2281 */
2282 if (ctxt->input != input)
2283 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2284 0, 0, 0);
2285 else
2286 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2287 '"', 0, 0);
2288
2289 /*
2290 * Pop-up of finished entities.
2291 */
2292 while ((CUR == 0) && (ctxt->inputNr > 1))
2293 xmlPopInput(ctxt);
2294
2295 if ((temp == NULL) && (tst == CUR_PTR)) {
2296 ret = xmlStrndup("", 0);
2297 break;
2298 }
2299 if ((temp[0] == 0) && (tst == CUR_PTR)) {
2300 free((char *)temp);
2301 ret = xmlStrndup("", 0);
2302 break;
2303 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002304 ret = xmlStrcat(ret, temp);
2305 if (temp != NULL) free((char *)temp);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002306 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002307 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002308 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002310 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002311 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002312 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002313 if (orig != NULL) /* !!!!!!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002314 *orig = xmlStrndup(org, CUR_PTR - org);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002315 if (ret == NULL)
2316 ret = xmlStrndup("", 0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002317 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002318 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002319 } else if (CUR == '\'') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002320 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2321 input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002322 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002323 org = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002324 /*
2325 * NOTE: 4.4.5 Included in Literal
2326 * When a parameter entity reference appears in a literal entity
2327 * value, ... a single or double quote character in the replacement
2328 * text is always treated as a normal data character and will not
2329 * terminate the literal.
2330 * In practice it means we stop the loop only when back at parsing
2331 * the initial entity and the quote is found
2332 */
2333 while ((CUR != '\'') || (ctxt->input != input)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002334 tst = CUR_PTR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002335 /*
2336 * NOTE: 4.4.7 Bypassed
2337 * When a general entity reference appears in the EntityValue in
2338 * an entity declaration, it is bypassed and left as is.
2339 * so XML_SUBSTITUTE_REF is not set.
2340 */
2341 if (ctxt->input != input)
2342 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2343 0, 0, 0);
2344 else
2345 temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2346 '\'', 0, 0);
2347
2348 /*
2349 * Pop-up of finished entities.
2350 */
2351 while ((CUR == 0) && (ctxt->inputNr > 1))
2352 xmlPopInput(ctxt);
2353
2354 if ((temp == NULL) && (tst == CUR_PTR)) {
2355 ret = xmlStrndup("", 0);
2356 break;
2357 }
2358 if ((temp[0] == 0) && (tst == CUR_PTR)) {
2359 free((char *)temp);
2360 ret = xmlStrndup("", 0);
2361 break;
2362 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002363 ret = xmlStrcat(ret, temp);
2364 if (temp != NULL) free((char *)temp);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002365 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002366 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002367 if (CUR != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002368 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002369 ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002370 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002371 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002372 if (orig != NULL) /* !!!!!!!!! */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002373 *orig = xmlStrndup(org, CUR_PTR - org);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002374 if (ret == NULL)
2375 ret = xmlStrndup("", 0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002376 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002377 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002378 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002380 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002381 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002382 }
2383
2384 return(ret);
2385}
2386
Daniel Veillard11e00581998-10-24 18:27:49 +00002387/**
2388 * xmlParseAttValue:
2389 * @ctxt: an XML parser context
2390 *
2391 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00002392 * Note: the parser won't do substitution of entities here, this
2393 * will be handled later in xmlStringGetNodeList, unless it was
2394 * asked for ctxt->replaceEntities != 0
Daniel Veillard260a68f1998-08-13 03:39:55 +00002395 *
2396 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2397 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00002398 *
2399 * Returns the AttValue parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002400 */
2401
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002402CHAR *
2403xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002404 CHAR *ret = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002405
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002406 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002407 if (CUR == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002408 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002409 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002410 ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002411 if (CUR == '<') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002413 ctxt->sax->error(ctxt->userData,
2414 "Unescaped '<' not allowed in attributes values\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002415 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002416 }
2417 if (CUR != '"') {
2418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2419 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2420 ctxt->wellFormed = 0;
2421 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00002422 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002423 } else if (CUR == '\'') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002424 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002425 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002426 ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002427 if (CUR == '<') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002429 ctxt->sax->error(ctxt->userData,
2430 "Unescaped '<' not allowed in attributes values\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002431 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002432 }
2433 if (CUR != '\'') {
2434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2435 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2436 ctxt->wellFormed = 0;
2437 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00002438 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002439 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002440 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002441 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002442 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002443 }
2444
2445 return(ret);
2446}
2447
Daniel Veillard11e00581998-10-24 18:27:49 +00002448/**
2449 * xmlParseSystemLiteral:
2450 * @ctxt: an XML parser context
2451 *
2452 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00002453 *
2454 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00002455 *
2456 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002457 */
2458
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002459CHAR *
2460xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002461 const CHAR *q;
2462 CHAR *ret = NULL;
2463
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002464 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002465 if (CUR == '"') {
2466 NEXT;
2467 q = CUR_PTR;
2468 while ((IS_CHAR(CUR)) && (CUR != '"'))
2469 NEXT;
2470 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002472 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002473 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002474 } else {
2475 ret = xmlStrndup(q, CUR_PTR - q);
2476 NEXT;
2477 }
2478 } else if (CUR == '\'') {
2479 NEXT;
2480 q = CUR_PTR;
2481 while ((IS_CHAR(CUR)) && (CUR != '\''))
2482 NEXT;
2483 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002485 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002486 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002487 } else {
2488 ret = xmlStrndup(q, CUR_PTR - q);
2489 NEXT;
2490 }
2491 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002493 ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002494 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002495 }
2496
2497 return(ret);
2498}
2499
Daniel Veillard11e00581998-10-24 18:27:49 +00002500/**
2501 * xmlParsePubidLiteral:
2502 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00002503 *
Daniel Veillard11e00581998-10-24 18:27:49 +00002504 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00002505 *
2506 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2507 *
2508 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002509 */
2510
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002511CHAR *
2512xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002513 const CHAR *q;
2514 CHAR *ret = NULL;
2515 /*
2516 * Name ::= (Letter | '_') (NameChar)*
2517 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002518 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002519 if (CUR == '"') {
2520 NEXT;
2521 q = CUR_PTR;
2522 while (IS_PUBIDCHAR(CUR)) NEXT;
2523 if (CUR != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002525 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002526 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002527 } else {
2528 ret = xmlStrndup(q, CUR_PTR - q);
2529 NEXT;
2530 }
2531 } else if (CUR == '\'') {
2532 NEXT;
2533 q = CUR_PTR;
2534 while ((IS_LETTER(CUR)) && (CUR != '\''))
2535 NEXT;
2536 if (!IS_LETTER(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002538 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002539 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002540 } else {
2541 ret = xmlStrndup(q, CUR_PTR - q);
2542 NEXT;
2543 }
2544 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002546 ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002547 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002548 }
2549
2550 return(ret);
2551}
2552
Daniel Veillard11e00581998-10-24 18:27:49 +00002553/**
2554 * xmlParseCharData:
2555 * @ctxt: an XML parser context
2556 * @cdata: int indicating whether we are within a CDATA section
2557 *
2558 * parse a CharData section.
2559 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002560 *
2561 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2562 */
2563
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002564void
2565xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002566 CHAR buf[1000];
2567 int nbchar = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002568 CHAR cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002569
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002570 SHRINK;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002571 /*
2572 * !!!!!!!!!!!!
2573 * NOTE: NXT(0) is used here to avoid breaking on &lt; or &amp;
2574 * entities substitutions.
2575 */
2576 cur = CUR;
2577 while ((IS_CHAR(cur)) && (cur != '<') &&
2578 (cur != '&')) {
2579 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002580 (NXT(2) == '>')) {
2581 if (cdata) break;
2582 else {
2583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002584 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002585 "Sequence ']]>' not allowed in content\n");
2586 ctxt->wellFormed = 0;
2587 }
2588 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002589 buf[nbchar++] = CUR;
2590 if (nbchar == 1000) {
2591 /*
2592 * Ok the segment is to be consumed as chars.
2593 */
2594 if (ctxt->sax != NULL) {
2595 if (areBlanks(ctxt, buf, nbchar)) {
2596 if (ctxt->sax->ignorableWhitespace != NULL)
2597 ctxt->sax->ignorableWhitespace(ctxt->userData,
2598 buf, nbchar);
2599 } else {
2600 if (ctxt->sax->characters != NULL)
2601 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2602 }
2603 }
2604 nbchar = 0;
2605 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002606 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002607 cur = CUR;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002608 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002609 if (nbchar != 0) {
2610 /*
2611 * Ok the segment is to be consumed as chars.
2612 */
2613 if (ctxt->sax != NULL) {
2614 if (areBlanks(ctxt, buf, nbchar)) {
2615 if (ctxt->sax->ignorableWhitespace != NULL)
2616 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2617 } else {
2618 if (ctxt->sax->characters != NULL)
2619 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2620 }
2621 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002622 }
2623}
2624
Daniel Veillard11e00581998-10-24 18:27:49 +00002625/**
2626 * xmlParseExternalID:
2627 * @ctxt: an XML parser context
2628 * @publicID: a CHAR** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002629 * @strict: indicate whether we should restrict parsing to only
2630 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00002631 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002632 * Parse an External ID or a Public ID
2633 *
2634 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2635 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00002636 *
2637 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2638 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00002639 *
2640 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2641 *
2642 * Returns the function returns SystemLiteral and in the second
2643 * case publicID receives PubidLiteral, is strict is off
2644 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002645 */
2646
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002647CHAR *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002648xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002649 CHAR *URI = NULL;
2650
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002651 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002652 if ((CUR == 'S') && (NXT(1) == 'Y') &&
2653 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2654 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2655 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002656 if (!IS_BLANK(CUR)) {
2657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002658 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002659 "Space required after 'SYSTEM'\n");
2660 ctxt->wellFormed = 0;
2661 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002662 SKIP_BLANKS;
2663 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002664 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002666 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002667 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002668 ctxt->wellFormed = 0;
2669 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002670 } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2671 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2672 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2673 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002674 if (!IS_BLANK(CUR)) {
2675 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002676 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002677 "Space required after 'PUBLIC'\n");
2678 ctxt->wellFormed = 0;
2679 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002680 SKIP_BLANKS;
2681 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002682 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002684 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002685 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002686 ctxt->wellFormed = 0;
2687 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00002688 if (strict) {
2689 /*
2690 * We don't handle [83] so "S SystemLiteral" is required.
2691 */
2692 if (!IS_BLANK(CUR)) {
2693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002694 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002695 "Space required after the Public Identifier\n");
2696 ctxt->wellFormed = 0;
2697 }
2698 } else {
2699 /*
2700 * We handle [83] so we return immediately, if
2701 * "S SystemLiteral" is not detected. From a purely parsing
2702 * point of view that's a nice mess.
2703 */
2704 const CHAR *ptr = CUR_PTR;
2705 if (!IS_BLANK(*ptr)) return(NULL);
2706
2707 while (IS_BLANK(*ptr)) ptr++;
2708 if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002709 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002710 SKIP_BLANKS;
2711 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002712 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002714 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002715 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002716 ctxt->wellFormed = 0;
2717 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002718 }
2719 return(URI);
2720}
2721
Daniel Veillard11e00581998-10-24 18:27:49 +00002722/**
2723 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00002724 * @ctxt: an XML parser context
2725 * @create: should we create a node, or just skip the content
Daniel Veillard11e00581998-10-24 18:27:49 +00002726 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002727 * Skip an XML (SGML) comment <!-- .... -->
2728 * This may or may not create a node (depending on the context)
2729 * The spec says that "For compatibility, the string "--" (double-hyphen)
2730 * must not occur within comments. "
2731 *
2732 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2733 */
Daniel Veillard517752b1999-04-05 12:20:10 +00002734void
Daniel Veillard1e346af1999-02-22 10:33:01 +00002735xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002736 const CHAR *q, *start;
2737 const CHAR *r;
2738 CHAR *val;
2739
2740 /*
2741 * Check that there is a comment right here.
2742 */
2743 if ((CUR != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00002744 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002745
Daniel Veillardb05deb71999-08-10 19:04:08 +00002746 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002747 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002748 SKIP(4);
2749 start = q = CUR_PTR;
2750 NEXT;
2751 r = CUR_PTR;
2752 NEXT;
2753 while (IS_CHAR(CUR) &&
2754 ((CUR == ':') || (CUR != '>') ||
2755 (*r != '-') || (*q != '-'))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002756 if ((*r == '-') && (*q == '-')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002758 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00002759 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002760 ctxt->wellFormed = 0;
2761 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002762 NEXT;r++;q++;
2763 }
2764 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002766 ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002767 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002768 } else {
2769 NEXT;
2770 if (create) {
2771 val = xmlStrndup(start, q - start);
Daniel Veillard517752b1999-04-05 12:20:10 +00002772 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002773 ctxt->sax->comment(ctxt->userData, val);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002774 free(val);
2775 }
2776 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002777}
2778
Daniel Veillard11e00581998-10-24 18:27:49 +00002779/**
2780 * xmlParsePITarget:
2781 * @ctxt: an XML parser context
2782 *
2783 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00002784 *
2785 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00002786 *
2787 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002788 */
2789
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002790CHAR *
2791xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002792 CHAR *name;
2793
2794 name = xmlParseName(ctxt);
2795 if ((name != NULL) && (name[3] == 0) &&
2796 ((name[0] == 'x') || (name[0] == 'X')) &&
2797 ((name[1] == 'm') || (name[1] == 'M')) &&
2798 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002800 ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00002801 return(NULL);
2802 }
2803 return(name);
2804}
2805
Daniel Veillard11e00581998-10-24 18:27:49 +00002806/**
2807 * xmlParsePI:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002811 *
2812 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00002813 *
2814 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002815 */
2816
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002817void
2818xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002819 CHAR *target;
2820
2821 if ((CUR == '<') && (NXT(1) == '?')) {
2822 /*
2823 * this is a Processing Instruction.
2824 */
2825 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002826 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002827
2828 /*
2829 * Parse the target name and check for special support like
2830 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002831 */
2832 target = xmlParsePITarget(ctxt);
2833 if (target != NULL) {
Daniel Veillard517752b1999-04-05 12:20:10 +00002834 const CHAR *q = CUR_PTR;
2835
2836 while (IS_CHAR(CUR) &&
2837 ((CUR != '?') || (NXT(1) != '>')))
2838 NEXT;
2839 if (!IS_CHAR(CUR)) {
2840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002841 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00002842 "xmlParsePI: PI %s never end ...\n", target);
2843 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002844 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00002845 CHAR *data;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002846
Daniel Veillard517752b1999-04-05 12:20:10 +00002847 data = xmlStrndup(q, CUR_PTR - q);
2848 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002849
Daniel Veillard517752b1999-04-05 12:20:10 +00002850 /*
2851 * SAX: PI detected.
2852 */
2853 if ((ctxt->sax) &&
2854 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002855 ctxt->sax->processingInstruction(ctxt->userData,
2856 target, data);
Daniel Veillard517752b1999-04-05 12:20:10 +00002857 free(data);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002858 }
2859 free(target);
2860 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002862 ctxt->sax->error(ctxt->userData,
2863 "xmlParsePI : no target name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002864 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002865 }
2866 }
2867}
2868
Daniel Veillard11e00581998-10-24 18:27:49 +00002869/**
2870 * xmlParseNotationDecl:
2871 * @ctxt: an XML parser context
2872 *
2873 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00002874 *
2875 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2876 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002877 * Hence there is actually 3 choices:
2878 * 'PUBLIC' S PubidLiteral
2879 * 'PUBLIC' S PubidLiteral S SystemLiteral
2880 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00002881 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002882 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00002883 */
2884
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002885void
2886xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002887 CHAR *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002888 CHAR *Pubid;
2889 CHAR *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002890
2891 if ((CUR == '<') && (NXT(1) == '!') &&
2892 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2893 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2894 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00002895 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002896 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002897 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002898 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002900 ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002901 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002902 return;
2903 }
2904 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00002905
2906 name = xmlParseName(ctxt);
2907 if (name == NULL) {
2908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002909 ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00002910 ctxt->wellFormed = 0;
2911 return;
2912 }
2913 if (!IS_BLANK(CUR)) {
2914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002915 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002916 "Space required after the NOTATION name'\n");
2917 ctxt->wellFormed = 0;
2918 return;
2919 }
2920 SKIP_BLANKS;
2921
Daniel Veillard260a68f1998-08-13 03:39:55 +00002922 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00002923 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002924 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00002925 Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2926 SKIP_BLANKS;
2927
2928 if (CUR == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002929 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00002930 if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002931 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002932 } else {
2933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002934 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00002935 "'>' required to close NOTATION declaration\n");
2936 ctxt->wellFormed = 0;
2937 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002938 free(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00002939 if (Systemid != NULL) free(Systemid);
2940 if (Pubid != NULL) free(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002941 }
2942}
2943
Daniel Veillard11e00581998-10-24 18:27:49 +00002944/**
2945 * xmlParseEntityDecl:
2946 * @ctxt: an XML parser context
2947 *
2948 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00002949 *
2950 * [70] EntityDecl ::= GEDecl | PEDecl
2951 *
2952 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2953 *
2954 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2955 *
2956 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2957 *
2958 * [74] PEDef ::= EntityValue | ExternalID
2959 *
2960 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00002961 *
2962 * [ VC: Notation Declared ]
2963 * TODO The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002964 */
2965
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002966void
2967xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002968 CHAR *name = NULL;
2969 CHAR *value = NULL;
2970 CHAR *URI = NULL, *literal = NULL;
2971 CHAR *ndata = NULL;
2972 int isParameter = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002973 CHAR *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002974
Daniel Veillardb05deb71999-08-10 19:04:08 +00002975 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002976 if ((CUR == '<') && (NXT(1) == '!') &&
2977 (NXT(2) == 'E') && (NXT(3) == 'N') &&
2978 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002979 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002980 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002981 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002982 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002983 if (!IS_BLANK(CUR)) {
2984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002985 ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002986 ctxt->wellFormed = 0;
2987 }
2988 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002989
2990 if (CUR == '%') {
2991 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002992 if (!IS_BLANK(CUR)) {
2993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002994 ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002995 ctxt->wellFormed = 0;
2996 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00002997 SKIP_BLANKS;
2998 isParameter = 1;
2999 }
3000
3001 name = xmlParseName(ctxt);
3002 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003004 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003005 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003006 return;
3007 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003008 if (!IS_BLANK(CUR)) {
3009 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003010 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003011 "Space required after the entity name\n");
3012 ctxt->wellFormed = 0;
3013 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003014 SKIP_BLANKS;
3015
3016 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003017 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00003018 */
3019 if (isParameter) {
3020 if ((CUR == '"') || (CUR == '\''))
Daniel Veillard011b63c1999-06-02 17:44:04 +00003021 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003022 if (value) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003023 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003024 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003025 XML_INTERNAL_PARAMETER_ENTITY,
3026 NULL, NULL, value);
3027 }
3028 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003029 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003030 if (URI) {
Daniel Veillard517752b1999-04-05 12:20:10 +00003031 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003032 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003033 XML_EXTERNAL_PARAMETER_ENTITY,
3034 literal, URI, NULL);
3035 }
3036 }
3037 } else {
3038 if ((CUR == '"') || (CUR == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00003039 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard517752b1999-04-05 12:20:10 +00003040 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003041 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003042 XML_INTERNAL_GENERAL_ENTITY,
3043 NULL, NULL, value);
3044 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003045 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003046 if ((CUR != '>') && (!IS_BLANK(CUR))) {
3047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003048 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003049 "Space required before 'NDATA'\n");
3050 ctxt->wellFormed = 0;
3051 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003052 SKIP_BLANKS;
3053 if ((CUR == 'N') && (NXT(1) == 'D') &&
3054 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3055 (NXT(4) == 'A')) {
3056 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003057 if (!IS_BLANK(CUR)) {
3058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003059 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003060 "Space required after 'NDATA'\n");
3061 ctxt->wellFormed = 0;
3062 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003063 SKIP_BLANKS;
3064 ndata = xmlParseName(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00003065 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003066 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003067 XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
3068 literal, URI, ndata);
3069 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00003070 if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003071 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003072 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3073 literal, URI, NULL);
3074 }
3075 }
3076 }
3077 SKIP_BLANKS;
3078 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003080 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003081 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003082 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003083 } else
3084 NEXT;
Daniel Veillard011b63c1999-06-02 17:44:04 +00003085 if (orig != NULL) {
3086 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00003087 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00003088 */
3089 xmlEntityPtr cur = NULL;
3090
Daniel Veillardb05deb71999-08-10 19:04:08 +00003091 if (isParameter) {
3092 if ((ctxt->sax != NULL) &&
3093 (ctxt->sax->getParameterEntity != NULL))
3094 cur = ctxt->sax->getParameterEntity(ctxt, name);
3095 } else {
3096 if ((ctxt->sax != NULL) &&
3097 (ctxt->sax->getEntity != NULL))
3098 cur = ctxt->sax->getEntity(ctxt, name);
3099 }
3100 if (cur != NULL) {
3101 if (cur->orig != NULL)
3102 free(orig);
3103 else
3104 cur->orig = orig;
3105 } else
Daniel Veillard011b63c1999-06-02 17:44:04 +00003106 free(orig);
3107 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003108 if (name != NULL) free(name);
3109 if (value != NULL) free(value);
3110 if (URI != NULL) free(URI);
3111 if (literal != NULL) free(literal);
3112 if (ndata != NULL) free(ndata);
3113 }
3114}
3115
Daniel Veillard11e00581998-10-24 18:27:49 +00003116/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003117 * xmlParseDefaultDecl:
3118 * @ctxt: an XML parser context
3119 * @value: Receive a possible fixed default value for the attribute
3120 *
3121 * Parse an attribute default declaration
3122 *
3123 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3124 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003125 * [ VC: Required Attribute ]
3126 * TODO if the default declaration is the keyword #REQUIRED, then the
3127 * attribute must be specified for all elements of the type in the
3128 * attribute-list declaration.
3129 *
3130 * [ VC: Attribute Default Legal ]
3131 * The declared default value must meet the lexical constraints of
3132 * the declared attribute type c.f. xmlValidateAttributeDecl()
3133 *
3134 * [ VC: Fixed Attribute Default ]
3135 * TODO if an attribute has a default value declared with the #FIXED
3136 * keyword, instances of that attribute must match the default value.
3137 *
3138 * [ WFC: No < in Attribute Values ]
3139 * handled in xmlParseAttValue()
3140 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003141 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3142 * or XML_ATTRIBUTE_FIXED.
3143 */
3144
3145int
3146xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3147 int val;
3148 CHAR *ret;
3149
3150 *value = NULL;
3151 if ((CUR == '#') && (NXT(1) == 'R') &&
3152 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3153 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3154 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3155 (NXT(8) == 'D')) {
3156 SKIP(9);
3157 return(XML_ATTRIBUTE_REQUIRED);
3158 }
3159 if ((CUR == '#') && (NXT(1) == 'I') &&
3160 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3161 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3162 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3163 SKIP(8);
3164 return(XML_ATTRIBUTE_IMPLIED);
3165 }
3166 val = XML_ATTRIBUTE_NONE;
3167 if ((CUR == '#') && (NXT(1) == 'F') &&
3168 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3169 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3170 SKIP(6);
3171 val = XML_ATTRIBUTE_FIXED;
3172 if (!IS_BLANK(CUR)) {
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003174 ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003175 ctxt->wellFormed = 0;
3176 }
3177 SKIP_BLANKS;
3178 }
3179 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003180 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003181 if (ret == NULL) {
3182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003183 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003184 "Attribute default value declaration error\n");
3185 ctxt->wellFormed = 0;
3186 } else
3187 *value = ret;
3188 return(val);
3189}
3190
3191/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00003192 * xmlParseNotationType:
3193 * @ctxt: an XML parser context
3194 *
3195 * parse an Notation attribute type.
3196 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003197 * Note: the leading 'NOTATION' S part has already being parsed...
3198 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003199 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3200 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003201 * [ VC: Notation Attributes ]
3202 * TODO Values of this type must match one of the notation names included
3203 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003204 *
3205 * Returns: the notation attribute tree built while parsing
3206 */
3207
3208xmlEnumerationPtr
3209xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3210 CHAR *name;
3211 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3212
3213 if (CUR != '(') {
3214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003215 ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003216 ctxt->wellFormed = 0;
3217 return(NULL);
3218 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003219 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003220 do {
3221 NEXT;
3222 SKIP_BLANKS;
3223 name = xmlParseName(ctxt);
3224 if (name == NULL) {
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003226 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003227 "Name expected in NOTATION declaration\n");
3228 ctxt->wellFormed = 0;
3229 return(ret);
3230 }
3231 cur = xmlCreateEnumeration(name);
3232 free(name);
3233 if (cur == NULL) return(ret);
3234 if (last == NULL) ret = last = cur;
3235 else {
3236 last->next = cur;
3237 last = cur;
3238 }
3239 SKIP_BLANKS;
3240 } while (CUR == '|');
3241 if (CUR != ')') {
3242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003243 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003244 "')' required to finish NOTATION declaration\n");
3245 ctxt->wellFormed = 0;
3246 return(ret);
3247 }
3248 NEXT;
3249 return(ret);
3250}
3251
3252/**
3253 * xmlParseEnumerationType:
3254 * @ctxt: an XML parser context
3255 *
3256 * parse an Enumeration attribute type.
3257 *
3258 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3259 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003260 * [ VC: Enumeration ]
3261 * TODO Values of this type must match one of the Nmtoken tokens in
3262 * the declaration
3263 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003264 * Returns: the enumeration attribute tree built while parsing
3265 */
3266
3267xmlEnumerationPtr
3268xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3269 CHAR *name;
3270 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3271
3272 if (CUR != '(') {
3273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003274 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003275 "'(' required to start ATTLIST enumeration\n");
3276 ctxt->wellFormed = 0;
3277 return(NULL);
3278 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003279 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003280 do {
3281 NEXT;
3282 SKIP_BLANKS;
3283 name = xmlParseNmtoken(ctxt);
3284 if (name == NULL) {
3285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003286 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003287 "NmToken expected in ATTLIST enumeration\n");
3288 ctxt->wellFormed = 0;
3289 return(ret);
3290 }
3291 cur = xmlCreateEnumeration(name);
3292 free(name);
3293 if (cur == NULL) return(ret);
3294 if (last == NULL) ret = last = cur;
3295 else {
3296 last->next = cur;
3297 last = cur;
3298 }
3299 SKIP_BLANKS;
3300 } while (CUR == '|');
3301 if (CUR != ')') {
3302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003303 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003304 "')' required to finish ATTLIST enumeration\n");
3305 ctxt->wellFormed = 0;
3306 return(ret);
3307 }
3308 NEXT;
3309 return(ret);
3310}
3311
3312/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003313 * xmlParseEnumeratedType:
3314 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003315 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003316 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003317 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003318 *
3319 * [57] EnumeratedType ::= NotationType | Enumeration
3320 *
3321 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3322 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003323 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003324 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00003325 */
3326
Daniel Veillard1e346af1999-02-22 10:33:01 +00003327int
3328xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3329 if ((CUR == 'N') && (NXT(1) == 'O') &&
3330 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3331 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3332 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3333 SKIP(8);
3334 if (!IS_BLANK(CUR)) {
3335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003336 ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
Daniel Veillard1e346af1999-02-22 10:33:01 +00003337 ctxt->wellFormed = 0;
3338 return(0);
3339 }
3340 SKIP_BLANKS;
3341 *tree = xmlParseNotationType(ctxt);
3342 if (*tree == NULL) return(0);
3343 return(XML_ATTRIBUTE_NOTATION);
3344 }
3345 *tree = xmlParseEnumerationType(ctxt);
3346 if (*tree == NULL) return(0);
3347 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003348}
3349
Daniel Veillard11e00581998-10-24 18:27:49 +00003350/**
3351 * xmlParseAttributeType:
3352 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00003353 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00003354 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003355 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003356 *
3357 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3358 *
3359 * [55] StringType ::= 'CDATA'
3360 *
3361 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3362 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00003363 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003364 * Validity constraints for attribute values syntax are checked in
3365 * xmlValidateAttributeValue()
3366 *
3367 * [ VC: ID ]
3368 * Values of type ID must match the Name production. TODO A name must not
3369 * appear more than once in an XML document as a value of this type;
3370 * i.e., ID values must uniquely identify the elements which bear them.
3371 *
3372 * [ VC: One ID per Element Type ]
3373 * TODO No element type may have more than one ID attribute specified.
3374 *
3375 * [ VC: ID Attribute Default ]
3376 * TODO An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3377 *
3378 * [ VC: IDREF ]
3379 * Values of type IDREF must match the Name production, and values
3380 * of type IDREFS must match Names; TODO each Name must match the value of
3381 * an ID attribute on some element in the XML document; i.e. IDREF
3382 * values must match the value of some ID attribute.
3383 *
3384 * [ VC: Entity Name ]
3385 * Values of type ENTITY must match the Name production, values
3386 * of type ENTITIES must match Names; TODO each Name must match the name of
3387 * an unparsed entity declared in the DTD.
3388 *
3389 * [ VC: Name Token ]
3390 * Values of type NMTOKEN must match the Nmtoken production; values
3391 * of type NMTOKENS must match Nmtokens.
3392 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003393 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00003394 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003395int
Daniel Veillard1e346af1999-02-22 10:33:01 +00003396xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003397 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003398 if ((CUR == 'C') && (NXT(1) == 'D') &&
3399 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3400 (NXT(4) == 'A')) {
3401 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003402 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003403 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3404 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00003405 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3406 SKIP(6);
3407 return(XML_ATTRIBUTE_IDREFS);
3408 } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3409 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003410 (NXT(4) == 'F')) {
3411 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003412 return(XML_ATTRIBUTE_IDREF);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003413 } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3414 SKIP(2);
3415 return(XML_ATTRIBUTE_ID);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003416 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3417 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3418 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3419 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003420 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003421 } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3422 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3423 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3424 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3425 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003426 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003427 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3428 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3429 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003430 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3431 SKIP(8);
3432 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003433 } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3434 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3435 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00003436 (NXT(6) == 'N')) {
3437 SKIP(7);
3438 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003439 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00003440 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003441}
3442
Daniel Veillard11e00581998-10-24 18:27:49 +00003443/**
3444 * xmlParseAttributeListDecl:
3445 * @ctxt: an XML parser context
3446 *
3447 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00003448 *
3449 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3450 *
3451 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00003452 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003453 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003454void
3455xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003456 CHAR *elemName;
3457 CHAR *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003458 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003459
Daniel Veillard260a68f1998-08-13 03:39:55 +00003460 if ((CUR == '<') && (NXT(1) == '!') &&
3461 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3462 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3463 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003464 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003465 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003466 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003468 ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003469 ctxt->wellFormed = 0;
3470 }
3471 SKIP_BLANKS;
3472 elemName = xmlParseName(ctxt);
3473 if (elemName == NULL) {
3474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003475 ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003476 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003477 return;
3478 }
3479 SKIP_BLANKS;
3480 while (CUR != '>') {
3481 const CHAR *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003482 int type;
3483 int def;
3484 CHAR *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003485
Daniel Veillardb05deb71999-08-10 19:04:08 +00003486 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003487 attrName = xmlParseName(ctxt);
3488 if (attrName == NULL) {
3489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003490 ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003491 ctxt->wellFormed = 0;
3492 break;
3493 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00003494 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003495 if (!IS_BLANK(CUR)) {
3496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003497 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003498 "Space required after the attribute name\n");
3499 ctxt->wellFormed = 0;
3500 break;
3501 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003502 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003503
Daniel Veillard1e346af1999-02-22 10:33:01 +00003504 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003505 if (type <= 0) break;
3506
Daniel Veillardb05deb71999-08-10 19:04:08 +00003507 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003508 if (!IS_BLANK(CUR)) {
3509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003510 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003511 "Space required after the attribute type\n");
3512 ctxt->wellFormed = 0;
3513 break;
3514 }
3515 SKIP_BLANKS;
3516
3517 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3518 if (def <= 0) break;
3519
Daniel Veillardb05deb71999-08-10 19:04:08 +00003520 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003521 if (CUR != '>') {
3522 if (!IS_BLANK(CUR)) {
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003524 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003525 "Space required after the attribute default value\n");
3526 ctxt->wellFormed = 0;
3527 break;
3528 }
3529 SKIP_BLANKS;
3530 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003531 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003533 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003534 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00003535 break;
3536 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003537 if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003538 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00003539 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003540 if (attrName != NULL)
3541 free(attrName);
3542 if (defaultValue != NULL)
3543 free(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003544 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003545 }
3546 if (CUR == '>')
3547 NEXT;
3548
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003549 free(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003550 }
3551}
3552
Daniel Veillard11e00581998-10-24 18:27:49 +00003553/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003554 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00003555 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00003556 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003557 * parse the declaration for a Mixed Element content
3558 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00003559 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003560 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3561 * '(' S? '#PCDATA' S? ')'
3562 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003563 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3564 *
3565 * [ VC: No Duplicate Types ]
3566 * TODO The same name must not appear more than once in a single
3567 * mixed-content declaration.
3568 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003569 * returns: the list of the xmlElementContentPtr describing the element choices
3570 */
3571xmlElementContentPtr
3572xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00003573 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003574 CHAR *elem = NULL;
3575
Daniel Veillardb05deb71999-08-10 19:04:08 +00003576 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003577 if ((CUR == '#') && (NXT(1) == 'P') &&
3578 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3579 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3580 (NXT(6) == 'A')) {
3581 SKIP(7);
3582 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003583 SHRINK;
Daniel Veillard3b9def11999-01-31 22:15:06 +00003584 if (CUR == ')') {
3585 NEXT;
3586 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3587 return(ret);
3588 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003589 if ((CUR == '(') || (CUR == '|')) {
3590 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3591 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003592 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003593 while (CUR == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00003594 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003595 if (elem == NULL) {
3596 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3597 if (ret == NULL) return(NULL);
3598 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00003599 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003600 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00003601 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3602 if (n == NULL) return(NULL);
3603 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3604 cur->c2 = n;
3605 cur = n;
Daniel Veillard1e346af1999-02-22 10:33:01 +00003606 free(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003607 }
3608 SKIP_BLANKS;
3609 elem = xmlParseName(ctxt);
3610 if (elem == NULL) {
3611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003612 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003613 "xmlParseElementMixedContentDecl : Name expected\n");
3614 ctxt->wellFormed = 0;
3615 xmlFreeElementContent(cur);
3616 return(NULL);
3617 }
3618 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003619 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003620 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00003621 if ((CUR == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003622 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003623 cur->c2 = xmlNewElementContent(elem,
3624 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003625 free(elem);
3626 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003627 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3628 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003629 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00003630 if (elem != NULL) free(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003632 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00003633 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003634 ctxt->wellFormed = 0;
3635 xmlFreeElementContent(ret);
3636 return(NULL);
3637 }
3638
3639 } else {
3640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003641 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003642 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3643 ctxt->wellFormed = 0;
3644 }
3645 return(ret);
3646}
3647
3648/**
3649 * xmlParseElementChildrenContentDecl:
3650 * @ctxt: an XML parser context
3651 *
3652 * parse the declaration for a Mixed Element content
3653 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3654 *
3655 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003656 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3657 *
3658 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3659 *
3660 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3661 *
3662 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3663 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003664 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3665 * TODO Parameter-entity replacement text must be properly nested
3666 * with parenthetized groups. That is to say, if either of the
3667 * opening or closing parentheses in a choice, seq, or Mixed
3668 * construct is contained in the replacement text for a parameter
3669 * entity, both must be contained in the same replacement text. For
3670 * interoperability, if a parameter-entity reference appears in a
3671 * choice, seq, or Mixed construct, its replacement text should not
3672 * be empty, and neither the first nor last non-blank character of
3673 * the replacement text should be a connector (| or ,).
3674 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003675 * returns: the tree of xmlElementContentPtr describing the element
3676 * hierarchy.
3677 */
3678xmlElementContentPtr
3679xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
3680 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
3681 CHAR *elem;
3682 CHAR type = 0;
3683
3684 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003685 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003686 if (CUR == '(') {
3687 /* Recurse on first child */
3688 NEXT;
3689 SKIP_BLANKS;
3690 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3691 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003692 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003693 } else {
3694 elem = xmlParseName(ctxt);
3695 if (elem == NULL) {
3696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003697 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003698 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3699 ctxt->wellFormed = 0;
3700 return(NULL);
3701 }
3702 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003703 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003704 if (CUR == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003705 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003706 NEXT;
3707 } else if (CUR == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003708 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003709 NEXT;
3710 } else if (CUR == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003711 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003712 NEXT;
3713 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003714 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003715 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00003716 free(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003717 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003718 }
3719 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003720 SHRINK;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003721 while (CUR != ')') {
3722 /*
3723 * Each loop we parse one separator and one element.
3724 */
3725 if (CUR == ',') {
3726 if (type == 0) type = CUR;
3727
3728 /*
3729 * Detect "Name | Name , Name" error
3730 */
3731 else if (type != CUR) {
3732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003733 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003734 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3735 type);
3736 ctxt->wellFormed = 0;
3737 xmlFreeElementContent(ret);
3738 return(NULL);
3739 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003740 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003741
3742 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3743 if (op == NULL) {
3744 xmlFreeElementContent(ret);
3745 return(NULL);
3746 }
3747 if (last == NULL) {
3748 op->c1 = ret;
3749 ret = cur = op;
3750 } else {
3751 cur->c2 = op;
3752 op->c1 = last;
3753 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00003754 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003755 }
3756 } else if (CUR == '|') {
3757 if (type == 0) type = CUR;
3758
3759 /*
3760 * Detect "Name , Name | Name" error
3761 */
3762 else if (type != CUR) {
3763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003764 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003765 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3766 type);
3767 ctxt->wellFormed = 0;
3768 xmlFreeElementContent(ret);
3769 return(NULL);
3770 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003771 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003772
3773 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3774 if (op == NULL) {
3775 xmlFreeElementContent(ret);
3776 return(NULL);
3777 }
3778 if (last == NULL) {
3779 op->c1 = ret;
3780 ret = cur = op;
3781 } else {
3782 cur->c2 = op;
3783 op->c1 = last;
3784 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00003785 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003786 }
3787 } else {
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003789 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003790 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3791 ctxt->wellFormed = 0;
3792 xmlFreeElementContent(ret);
3793 return(NULL);
3794 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00003795 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003796 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003797 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003798 if (CUR == '(') {
3799 /* Recurse on second child */
3800 NEXT;
3801 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00003802 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003803 SKIP_BLANKS;
3804 } else {
3805 elem = xmlParseName(ctxt);
3806 if (elem == NULL) {
3807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003808 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003809 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3810 ctxt->wellFormed = 0;
3811 return(NULL);
3812 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003813 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard1e346af1999-02-22 10:33:01 +00003814 free(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003815 if (CUR == '?') {
3816 last->ocur = XML_ELEMENT_CONTENT_OPT;
3817 NEXT;
3818 } else if (CUR == '*') {
3819 last->ocur = XML_ELEMENT_CONTENT_MULT;
3820 NEXT;
3821 } else if (CUR == '+') {
3822 last->ocur = XML_ELEMENT_CONTENT_PLUS;
3823 NEXT;
3824 } else {
3825 last->ocur = XML_ELEMENT_CONTENT_ONCE;
3826 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003827 }
3828 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003829 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003830 }
Daniel Veillard1899e851999-02-01 12:18:54 +00003831 if ((cur != NULL) && (last != NULL)) {
3832 cur->c2 = last;
3833 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003834 NEXT;
3835 if (CUR == '?') {
3836 ret->ocur = XML_ELEMENT_CONTENT_OPT;
3837 NEXT;
3838 } else if (CUR == '*') {
3839 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3840 NEXT;
3841 } else if (CUR == '+') {
3842 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3843 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003844 }
3845 return(ret);
3846}
3847
3848/**
3849 * xmlParseElementContentDecl:
3850 * @ctxt: an XML parser context
3851 * @name: the name of the element being defined.
3852 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00003853 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003854 * parse the declaration for an Element content either Mixed or Children,
3855 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3856 *
3857 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00003858 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003859 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00003860 */
3861
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003862int
3863xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3864 xmlElementContentPtr *result) {
3865
3866 xmlElementContentPtr tree = NULL;
3867 int res;
3868
3869 *result = NULL;
3870
3871 if (CUR != '(') {
3872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003873 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003874 "xmlParseElementContentDecl : '(' expected\n");
3875 ctxt->wellFormed = 0;
3876 return(-1);
3877 }
3878 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003879 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003880 SKIP_BLANKS;
3881 if ((CUR == '#') && (NXT(1) == 'P') &&
3882 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3883 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3884 (NXT(6) == 'A')) {
3885 tree = xmlParseElementMixedContentDecl(ctxt);
3886 res = XML_ELEMENT_TYPE_MIXED;
3887 } else {
3888 tree = xmlParseElementChildrenContentDecl(ctxt);
3889 res = XML_ELEMENT_TYPE_ELEMENT;
3890 }
3891 SKIP_BLANKS;
3892 /****************************
3893 if (CUR != ')') {
3894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003895 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003896 "xmlParseElementContentDecl : ')' expected\n");
3897 ctxt->wellFormed = 0;
3898 return(-1);
3899 }
3900 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00003901 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003902 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003903}
3904
Daniel Veillard11e00581998-10-24 18:27:49 +00003905/**
3906 * xmlParseElementDecl:
3907 * @ctxt: an XML parser context
3908 *
3909 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003910 *
3911 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3912 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003913 * [ VC: Unique Element Type Declaration ]
3914 * TODO No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00003915 *
3916 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00003917 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003918int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003919xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003920 CHAR *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003921 int ret = -1;
3922 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003923
Daniel Veillardb05deb71999-08-10 19:04:08 +00003924 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003925 if ((CUR == '<') && (NXT(1) == '!') &&
3926 (NXT(2) == 'E') && (NXT(3) == 'L') &&
3927 (NXT(4) == 'E') && (NXT(5) == 'M') &&
3928 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003929 (NXT(8) == 'T')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003930 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003931 if (!IS_BLANK(CUR)) {
3932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003933 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003934 "Space required after 'ELEMENT'\n");
3935 ctxt->wellFormed = 0;
3936 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003937 SKIP_BLANKS;
3938 name = xmlParseName(ctxt);
3939 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003941 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003942 "xmlParseElementDecl: no name for Element\n");
3943 ctxt->wellFormed = 0;
3944 return(-1);
3945 }
3946 if (!IS_BLANK(CUR)) {
3947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003948 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003949 "Space required after the element name\n");
3950 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003951 }
3952 SKIP_BLANKS;
3953 if ((CUR == 'E') && (NXT(1) == 'M') &&
3954 (NXT(2) == 'P') && (NXT(3) == 'T') &&
3955 (NXT(4) == 'Y')) {
3956 SKIP(5);
3957 /*
3958 * Element must always be empty.
3959 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003960 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003961 } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3962 (NXT(2) == 'Y')) {
3963 SKIP(3);
3964 /*
3965 * Element is a generic container.
3966 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003967 ret = XML_ELEMENT_TYPE_ANY;
3968 } else if (CUR == '(') {
3969 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003970 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003971 /*
3972 * [ WFC: PEs in Internal Subset ] error handling.
3973 */
3974 if ((CUR == '%') && (ctxt->external == 0) &&
3975 (ctxt->inputNr == 1)) {
3976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3977 ctxt->sax->error(ctxt->userData,
3978 "PEReference: forbidden within markup decl in internal subset\n");
3979 } else {
3980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3981 ctxt->sax->error(ctxt->userData,
3982 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
3983 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003984 ctxt->wellFormed = 0;
3985 if (name != NULL) free(name);
3986 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003987 }
3988 SKIP_BLANKS;
3989 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003991 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00003992 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003993 ctxt->wellFormed = 0;
3994 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003995 NEXT;
Daniel Veillard517752b1999-04-05 12:20:10 +00003996 if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00003997 ctxt->sax->elementDecl(ctxt->userData, name, ret,
3998 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003999 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004000 if (content != NULL) {
4001 xmlFreeElementContent(content);
4002 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004003 if (name != NULL) {
4004 free(name);
4005 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004006 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004007 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004008}
4009
Daniel Veillard11e00581998-10-24 18:27:49 +00004010/**
4011 * xmlParseMarkupDecl:
4012 * @ctxt: an XML parser context
4013 *
4014 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004015 *
4016 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4017 * NotationDecl | PI | Comment
4018 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004019 * [ VC: Proper Declaration/PE Nesting ]
4020 * TODO Parameter-entity replacement text must be properly nested with
4021 * markup declarations. That is to say, if either the first character
4022 * or the last character of a markup declaration (markupdecl above) is
4023 * contained in the replacement text for a parameter-entity reference,
4024 * both must be contained in the same replacement text.
4025 *
4026 * [ WFC: PEs in Internal Subset ]
4027 * In the internal DTD subset, parameter-entity references can occur
4028 * only where markup declarations can occur, not within markup declarations.
4029 * (This does not apply to references that occur in external parameter
4030 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00004031 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004032void
4033xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004034 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004035 xmlParseElementDecl(ctxt);
4036 xmlParseAttributeListDecl(ctxt);
4037 xmlParseEntityDecl(ctxt);
4038 xmlParseNotationDecl(ctxt);
4039 xmlParsePI(ctxt);
4040 xmlParseComment(ctxt, 0);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004041 /*
4042 * This is only for internal subset. On external entities,
4043 * the replacement is done before parsing stage
4044 */
4045 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4046 xmlParsePEReference(ctxt);
4047 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004048}
4049
Daniel Veillard11e00581998-10-24 18:27:49 +00004050/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004051 * xmlParseTextDecl:
4052 * @ctxt: an XML parser context
4053 *
4054 * parse an XML declaration header for external entities
4055 *
4056 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4057 *
4058 * Returns the only valuable info for an external parsed entity, the encoding
4059 */
4060
4061CHAR *
4062xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4063 CHAR *version;
4064 CHAR *encoding = NULL;
4065
4066 /*
4067 * We know that '<?xml' is here.
4068 */
4069 SKIP(5);
4070
4071 if (!IS_BLANK(CUR)) {
4072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4074 ctxt->wellFormed = 0;
4075 }
4076 SKIP_BLANKS;
4077
4078 /*
4079 * We may have the VersionInfo here.
4080 */
4081 version = xmlParseVersionInfo(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004082
Daniel Veillard011b63c1999-06-02 17:44:04 +00004083 /* TODO: we should actually inherit from the referencing doc if absent
4084 if (version == NULL)
4085 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4086 ctxt->version = xmlStrdup(version);
4087 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00004088
Daniel Veillard011b63c1999-06-02 17:44:04 +00004089 if (version != NULL)
4090 free(version);
4091
4092 /*
4093 * We must have the encoding declaration
4094 */
4095 if (!IS_BLANK(CUR)) {
4096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4097 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4098 ctxt->wellFormed = 0;
4099 }
4100 encoding = xmlParseEncodingDecl(ctxt);
4101
4102 SKIP_BLANKS;
4103 if ((CUR == '?') && (NXT(1) == '>')) {
4104 SKIP(2);
4105 } else if (CUR == '>') {
4106 /* Deprecated old WD ... */
4107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4108 ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4109 ctxt->wellFormed = 0;
4110 NEXT;
4111 } else {
4112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4113 ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4114 ctxt->wellFormed = 0;
4115 MOVETO_ENDTAG(CUR_PTR);
4116 NEXT;
4117 }
4118 return(encoding);
4119}
4120
4121/*
4122 * xmlParseConditionalSections
4123 * @ctxt: an XML parser context
4124 *
4125 * TODO : Conditionnal section are not yet supported !
4126 *
4127 * [61] conditionalSect ::= includeSect | ignoreSect
4128 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4129 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4130 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4131 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4132 */
4133
4134void
4135xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4136 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4137 ctxt->sax->warning(ctxt->userData,
4138 "XML conditional section not supported\n");
4139 /*
4140 * Skip up to the end of the conditionnal section.
4141 */
4142 while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4143 NEXT;
4144 if (CUR == 0) {
4145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4146 ctxt->sax->error(ctxt->userData,
4147 "XML conditional section not closed\n");
4148 ctxt->wellFormed = 0;
4149 }
4150}
4151
4152/**
4153 * xmlParseExternalSubset
4154 * @ctxt: an XML parser context
4155 *
4156 * parse Markup declarations from an external subset
4157 *
4158 * [30] extSubset ::= textDecl? extSubsetDecl
4159 *
4160 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4161 *
4162 * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
4163 */
4164void
4165xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4166 const CHAR *SystemID) {
4167 if ((CUR == '<') && (NXT(1) == '?') &&
4168 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4169 (NXT(4) == 'l')) {
4170 xmlParseTextDecl(ctxt);
4171 }
4172 if (ctxt->myDoc == NULL) {
4173 ctxt->myDoc = xmlNewDoc("1.0");
4174 }
4175 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4176 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4177
Daniel Veillardb05deb71999-08-10 19:04:08 +00004178 ctxt->instate = XML_PARSER_DTD;
4179 ctxt->external = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004180 while (((CUR == '<') && (NXT(1) == '?')) ||
4181 ((CUR == '<') && (NXT(1) == '!')) ||
4182 IS_BLANK(CUR)) {
4183 if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4184 xmlParseConditionalSections(ctxt);
4185 } else if (IS_BLANK(CUR)) {
4186 NEXT;
4187 } else if (CUR == '%') {
4188 xmlParsePEReference(ctxt);
4189 } else
4190 xmlParseMarkupDecl(ctxt);
4191
4192 /*
4193 * Pop-up of finished entities.
4194 */
4195 while ((CUR == 0) && (ctxt->inputNr > 1))
4196 xmlPopInput(ctxt);
4197
4198 }
4199
4200 if (CUR != 0) {
4201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4202 ctxt->sax->error(ctxt->userData,
4203 "Extra content at the end of the document\n");
4204 ctxt->wellFormed = 0;
4205 }
4206
4207}
4208
4209/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00004210 * xmlParseReference:
4211 * @ctxt: an XML parser context
4212 *
4213 * parse and handle entity references in content, depending on the SAX
4214 * interface, this may end-up in a call to character() if this is a
4215 * CharRef, a predefined entity, if there is no reference() callback.
4216 * or if the parser was asked to switch to that mode.
4217 *
4218 * [67] Reference ::= EntityRef | CharRef
4219 */
4220void
4221xmlParseReference(xmlParserCtxtPtr ctxt) {
4222 xmlEntityPtr ent;
4223 CHAR *val;
4224 if (CUR != '&') return;
4225
4226 if (NXT(1) == '#') {
4227 CHAR out[2];
4228 int val = xmlParseCharRef(ctxt);
4229 /* TODO: invalid for UTF-8 variable encoding !!! */
4230 out[0] = val;
4231 out[1] = 0;
4232 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4233 ctxt->sax->characters(ctxt->userData, out, 1);
4234 } else {
4235 ent = xmlParseEntityRef(ctxt);
4236 if (ent == NULL) return;
4237 if ((ent->name != NULL) &&
4238 (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
4239 (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4240 (ctxt->replaceEntities == 0)) {
4241
4242 /*
4243 * Create a node.
4244 */
4245 ctxt->sax->reference(ctxt->userData, ent->name);
4246 return;
4247 }
4248 val = ent->content;
4249 if (val == NULL) return;
4250 /*
4251 * inline the entity.
4252 */
4253 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4254 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4255 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004256}
4257
Daniel Veillard11e00581998-10-24 18:27:49 +00004258/**
4259 * xmlParseEntityRef:
4260 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004261 *
4262 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004263 *
4264 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004265 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004266 * [ WFC: Entity Declared ]
4267 * In a document without any DTD, a document with only an internal DTD
4268 * subset which contains no parameter entity references, or a document
4269 * with "standalone='yes'", the Name given in the entity reference
4270 * must match that in an entity declaration, except that well-formed
4271 * documents need not declare any of the following entities: amp, lt,
4272 * gt, apos, quot. The declaration of a parameter entity must precede
4273 * any reference to it. Similarly, the declaration of a general entity
4274 * must precede any reference to it which appears in a default value in an
4275 * attribute-list declaration. Note that if entities are declared in the
4276 * external subset or in external parameter entities, a non-validating
4277 * processor is not obligated to read and process their declarations;
4278 * for such documents, the rule that an entity must be declared is a
4279 * well-formedness constraint only if standalone='yes'.
4280 *
4281 * [ WFC: Parsed Entity ]
4282 * An entity reference must not contain the name of an unparsed entity
4283 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004284 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004285 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004286xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004287xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004288 CHAR *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00004289 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004290
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004291 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004292
Daniel Veillard260a68f1998-08-13 03:39:55 +00004293 if (CUR == '&') {
4294 NEXT;
4295 name = xmlParseName(ctxt);
4296 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004298 ctxt->sax->error(ctxt->userData,
4299 "xmlParseEntityRef: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004300 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004301 } else {
4302 if (CUR == ';') {
4303 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004304 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00004305 * Ask first SAX for entity resolution, otherwise try the
4306 * predefined set.
4307 */
4308 if (ctxt->sax != NULL) {
4309 if (ctxt->sax->getEntity != NULL)
4310 ent = ctxt->sax->getEntity(ctxt->userData, name);
4311 if (ent == NULL)
4312 ent = xmlGetPredefinedEntity(name);
4313 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004314 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004315 * [ WFC: Entity Declared ]
4316 * In a document without any DTD, a document with only an
4317 * internal DTD subset which contains no parameter entity
4318 * references, or a document with "standalone='yes'", the
4319 * Name given in the entity reference must match that in an
4320 * entity declaration, except that well-formed documents
4321 * need not declare any of the following entities: amp, lt,
4322 * gt, apos, quot.
4323 * The declaration of a parameter entity must precede any
4324 * reference to it.
4325 * Similarly, the declaration of a general entity must
4326 * precede any reference to it which appears in a default
4327 * value in an attribute-list declaration. Note that if
4328 * entities are declared in the external subset or in
4329 * external parameter entities, a non-validating processor
4330 * is not obligated to read and process their declarations;
4331 * for such documents, the rule that an entity must be
4332 * declared is a well-formedness constraint only if
4333 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004334 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004335 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004336 if ((ctxt->standalone == 1) ||
4337 ((ctxt->hasExternalSubset == 0) &&
4338 (ctxt->hasPErefs == 0))) {
4339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004340 ctxt->sax->error(ctxt->userData,
4341 "Entity '%s' not defined\n", name);
4342 ctxt->wellFormed = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00004343 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004344 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4345 ctxt->sax->warning(ctxt->userData,
4346 "Entity '%s' not defined\n", name);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004347 }
4348 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004349
4350 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004351 * [ WFC: Parsed Entity ]
4352 * An entity reference must not contain the name of an
4353 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004354 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00004355 else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4357 ctxt->sax->error(ctxt->userData,
4358 "Entity reference to unparsed entity %s\n", name);
4359 ctxt->wellFormed = 0;
4360 }
4361
4362 /*
4363 * [ WFC: No External Entity References ]
4364 * Attribute values cannot contain direct or indirect
4365 * entity references to external entities.
4366 */
4367 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4368 (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4370 ctxt->sax->error(ctxt->userData,
4371 "Attribute references external entity '%s'\n", name);
4372 ctxt->wellFormed = 0;
4373 }
4374 /*
4375 * [ WFC: No < in Attribute Values ]
4376 * The replacement text of any entity referred to directly or
4377 * indirectly in an attribute value (other than "&lt;") must
4378 * not contain a <.
4379 */
4380 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4381 (ent != NULL) && (xmlStrcmp(ent->name, "lt")) &&
4382 (ent->content != NULL) &&
4383 (xmlStrchr(ent->content, '<'))) {
4384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4385 ctxt->sax->error(ctxt->userData,
4386 "'<' in entity '%s' is not allowed in attributes values\n", name);
4387 ctxt->wellFormed = 0;
4388 }
4389
4390 /*
4391 * Internal check, no parameter entities here ...
4392 */
4393 else {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004394 switch (ent->type) {
4395 case XML_INTERNAL_PARAMETER_ENTITY:
4396 case XML_EXTERNAL_PARAMETER_ENTITY:
4397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004398 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004399 "Attempt to reference the parameter entity '%s'\n", name);
4400 ctxt->wellFormed = 0;
4401 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004402 }
4403 }
4404
4405 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004406 * [ WFC: No Recursion ]
4407 * TODO A parsed entity must not contain a recursive
4408 * reference to itself, either directly or indirectly.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004409 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00004410
Daniel Veillard011b63c1999-06-02 17:44:04 +00004411 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004413 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004414 "xmlParseEntityRef: expecting ';'\n");
4415 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004416 }
4417 free(name);
4418 }
4419 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004420 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004421}
4422
Daniel Veillard11e00581998-10-24 18:27:49 +00004423/**
4424 * xmlParsePEReference:
4425 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004426 *
4427 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00004428 * The entity content is handled directly by pushing it's content as
4429 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004430 *
4431 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004432 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004433 * [ WFC: No Recursion ]
4434 * TODO A parsed entity must not contain a recursive
4435 * reference to itself, either directly or indirectly.
4436 *
4437 * [ WFC: Entity Declared ]
4438 * In a document without any DTD, a document with only an internal DTD
4439 * subset which contains no parameter entity references, or a document
4440 * with "standalone='yes'", ... ... The declaration of a parameter
4441 * entity must precede any reference to it...
4442 *
4443 * [ VC: Entity Declared ]
4444 * In a document with an external subset or external parameter entities
4445 * with "standalone='no'", ... ... The declaration of a parameter entity
4446 * must precede any reference to it...
4447 *
4448 * [ WFC: In DTD ]
4449 * Parameter-entity references may only appear in the DTD.
4450 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004451 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00004452void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004453xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004454 CHAR *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00004455 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00004456 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004457
4458 if (CUR == '%') {
4459 NEXT;
4460 name = xmlParseName(ctxt);
4461 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004463 ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004464 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004465 } else {
4466 if (CUR == ';') {
4467 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004468 if ((ctxt->sax != NULL) &&
4469 (ctxt->sax->getParameterEntity != NULL))
4470 entity = ctxt->sax->getParameterEntity(ctxt->userData,
4471 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004472 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004473 /*
4474 * [ WFC: Entity Declared ]
4475 * In a document without any DTD, a document with only an
4476 * internal DTD subset which contains no parameter entity
4477 * references, or a document with "standalone='yes'", ...
4478 * ... The declaration of a parameter entity must precede
4479 * any reference to it...
4480 */
4481 if ((ctxt->standalone == 1) ||
4482 ((ctxt->hasExternalSubset == 0) &&
4483 (ctxt->hasPErefs == 0))) {
4484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4485 ctxt->sax->error(ctxt->userData,
4486 "PEReference: %%%s; not found\n", name);
4487 ctxt->wellFormed = 0;
4488 } else {
4489 /*
4490 * [ VC: Entity Declared ]
4491 * In a document with an external subset or external
4492 * parameter entities with "standalone='no'", ...
4493 * ... The declaration of a parameter entity must precede
4494 * any reference to it...
4495 */
4496 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4497 ctxt->sax->warning(ctxt->userData,
4498 "PEReference: %%%s; not found\n", name);
4499 ctxt->valid = 0;
4500 }
Daniel Veillardccb09631998-10-27 06:21:04 +00004501 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004502 /*
4503 * Internal checking in case the entity quest barfed
4504 */
4505 if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4506 (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4507 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4508 ctxt->sax->warning(ctxt->userData,
4509 "Internal: %%%s; is not a parameter entity\n", name);
4510 } else {
4511 input = xmlNewEntityInputStream(ctxt, entity);
4512 xmlPushInput(ctxt, input);
4513 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004514 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004515 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004516 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004518 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004519 "xmlParsePEReference: expecting ';'\n");
4520 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004521 }
4522 free(name);
4523 }
4524 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004525}
4526
Daniel Veillard11e00581998-10-24 18:27:49 +00004527/**
4528 * xmlParseDocTypeDecl :
4529 * @ctxt: an XML parser context
4530 *
4531 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00004532 *
4533 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4534 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00004535 *
4536 * [ VC: Root Element Type ]
4537 * The Name in the document type declaration must match the element
4538 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004539 */
4540
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004541void
4542xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004543 CHAR *name;
4544 CHAR *ExternalID = NULL;
4545 CHAR *URI = NULL;
4546
4547 /*
4548 * We know that '<!DOCTYPE' has been detected.
4549 */
4550 SKIP(9);
4551
4552 SKIP_BLANKS;
4553
4554 /*
4555 * Parse the DOCTYPE name.
4556 */
4557 name = xmlParseName(ctxt);
4558 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004560 ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004561 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004562 }
4563
4564 SKIP_BLANKS;
4565
4566 /*
4567 * Check for SystemID and ExternalID
4568 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00004569 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004570
4571 if ((URI != NULL) || (ExternalID != NULL)) {
4572 ctxt->hasExternalSubset = 1;
4573 }
4574
Daniel Veillard260a68f1998-08-13 03:39:55 +00004575 SKIP_BLANKS;
4576
Daniel Veillard011b63c1999-06-02 17:44:04 +00004577 /*
4578 * NOTE: the SAX callback may try to fetch the external subset
4579 * entity and fill it up !
4580 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004581 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004582 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004583
4584 /*
4585 * Is there any DTD definition ?
4586 */
4587 if (CUR == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004588 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004589 NEXT;
4590 /*
4591 * Parse the succession of Markup declarations and
4592 * PEReferences.
4593 * Subsequence (markupdecl | PEReference | S)*
4594 */
4595 while (CUR != ']') {
4596 const CHAR *check = CUR_PTR;
4597
4598 SKIP_BLANKS;
4599 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00004600 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004601
4602 if (CUR_PTR == check) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004604 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004605 "xmlParseDocTypeDecl: error detected in Markup declaration\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004606 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004607 break;
4608 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004609
4610 /*
4611 * Pop-up of finished entities.
4612 */
4613 while ((CUR == 0) && (ctxt->inputNr > 1))
4614 xmlPopInput(ctxt);
4615
Daniel Veillard260a68f1998-08-13 03:39:55 +00004616 }
4617 if (CUR == ']') NEXT;
4618 }
4619
4620 /*
4621 * We should be at the end of the DOCTYPE declaration.
4622 */
4623 if (CUR != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004625 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004626 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004627 /* We shouldn't try to resynchronize ... */
4628 }
4629 NEXT;
4630
4631 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004632 * Cleanup
Daniel Veillard260a68f1998-08-13 03:39:55 +00004633 */
4634 if (URI != NULL) free(URI);
4635 if (ExternalID != NULL) free(ExternalID);
4636 if (name != NULL) free(name);
4637}
4638
Daniel Veillard11e00581998-10-24 18:27:49 +00004639/**
4640 * xmlParseAttribute:
4641 * @ctxt: an XML parser context
Daniel Veillard517752b1999-04-05 12:20:10 +00004642 * @value: a CHAR ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00004643 *
4644 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00004645 *
4646 * [41] Attribute ::= Name Eq AttValue
4647 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004648 * [ WFC: No External Entity References ]
4649 * Attribute values cannot contain direct or indirect entity references
4650 * to external entities.
4651 *
4652 * [ WFC: No < in Attribute Values ]
4653 * The replacement text of any entity referred to directly or indirectly in
4654 * an attribute value (other than "&lt;") must not contain a <.
4655 *
4656 * [ VC: Attribute Value Type ]
4657 * TODO The attribute must have been declared; the value must be of the type
4658 * declared for it.
4659 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004660 * [25] Eq ::= S? '=' S?
4661 *
4662 * With namespace:
4663 *
4664 * [NS 11] Attribute ::= QName Eq AttValue
4665 *
4666 * Also the case QName == xmlns:??? is handled independently as a namespace
4667 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00004668 *
Daniel Veillard517752b1999-04-05 12:20:10 +00004669 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004670 */
4671
Daniel Veillard517752b1999-04-05 12:20:10 +00004672CHAR *
4673xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004674 CHAR *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004675
Daniel Veillard517752b1999-04-05 12:20:10 +00004676 *value = NULL;
4677 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004678 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004680 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004681 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00004682 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004683 }
4684
4685 /*
4686 * read the value
4687 */
4688 SKIP_BLANKS;
4689 if (CUR == '=') {
4690 NEXT;
4691 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00004692 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004693 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004694 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004696 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004697 "Specification mandate value for attribute %s\n", name);
4698 ctxt->wellFormed = 0;
Daniel Veillardccb09631998-10-27 06:21:04 +00004699 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004700 }
4701
Daniel Veillard517752b1999-04-05 12:20:10 +00004702 *value = val;
4703 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004704}
4705
Daniel Veillard11e00581998-10-24 18:27:49 +00004706/**
4707 * xmlParseStartTag:
4708 * @ctxt: an XML parser context
4709 *
4710 * parse a start of tag either for rule element or
4711 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004712 *
4713 * [40] STag ::= '<' Name (S Attribute)* S? '>'
4714 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004715 * [ WFC: Unique Att Spec ]
4716 * No attribute name may appear more than once in the same start-tag or
4717 * empty-element tag.
4718 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004719 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4720 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004721 * [ WFC: Unique Att Spec ]
4722 * No attribute name may appear more than once in the same start-tag or
4723 * empty-element tag.
4724 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004725 * With namespace:
4726 *
4727 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4728 *
4729 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00004730 *
4731 * Returns the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00004732 */
4733
Daniel Veillard14fff061999-06-22 21:49:07 +00004734CHAR *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004735xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard517752b1999-04-05 12:20:10 +00004736 CHAR *name;
4737 CHAR *attname;
4738 CHAR *attvalue;
4739 const CHAR **atts = NULL;
4740 int nbatts = 0;
4741 int maxatts = 0;
4742 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004743
Daniel Veillard14fff061999-06-22 21:49:07 +00004744 if (CUR != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004745 NEXT;
4746
Daniel Veillard517752b1999-04-05 12:20:10 +00004747 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004748 if (name == NULL) {
4749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004750 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004751 "xmlParseStartTag: invalid element name\n");
4752 ctxt->wellFormed = 0;
Daniel Veillard14fff061999-06-22 21:49:07 +00004753 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004754 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004755
4756 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00004757 * Now parse the attributes, it ends up with the ending
4758 *
4759 * (S Attribute)* S?
4760 */
4761 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004762 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004763 while ((IS_CHAR(CUR)) &&
4764 (CUR != '>') &&
4765 ((CUR != '/') || (NXT(1) != '>'))) {
4766 const CHAR *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004767 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004768
Daniel Veillard517752b1999-04-05 12:20:10 +00004769 attname = xmlParseAttribute(ctxt, &attvalue);
4770 if ((attname != NULL) && (attvalue != NULL)) {
4771 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004772 * [ WFC: Unique Att Spec ]
4773 * No attribute name may appear more than once in the same
4774 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00004775 */
4776 for (i = 0; i < nbatts;i += 2) {
4777 if (!xmlStrcmp(atts[i], attname)) {
4778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004779 ctxt->sax->error(ctxt->userData,
4780 "Attribute %s redefined\n",
4781 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00004782 ctxt->wellFormed = 0;
4783 free(attname);
4784 free(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004785 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00004786 }
4787 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004788
Daniel Veillard517752b1999-04-05 12:20:10 +00004789 /*
4790 * Add the pair to atts
4791 */
4792 if (atts == NULL) {
4793 maxatts = 10;
4794 atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
4795 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00004796 fprintf(stderr, "malloc of %ld byte failed\n",
4797 maxatts * (long)sizeof(CHAR *));
Daniel Veillard14fff061999-06-22 21:49:07 +00004798 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00004799 }
4800 } else if (nbatts + 2 < maxatts) {
4801 maxatts *= 2;
4802 atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
4803 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00004804 fprintf(stderr, "realloc of %ld byte failed\n",
4805 maxatts * (long)sizeof(CHAR *));
Daniel Veillard14fff061999-06-22 21:49:07 +00004806 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00004807 }
4808 }
4809 atts[nbatts++] = attname;
4810 atts[nbatts++] = attvalue;
4811 atts[nbatts] = NULL;
4812 atts[nbatts + 1] = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004813failed:
Daniel Veillard517752b1999-04-05 12:20:10 +00004814 }
4815
4816 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004817 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004819 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004820 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004821 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004822 break;
4823 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004824 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004825 }
4826
4827 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00004828 * SAX: Start of Element !
4829 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004830 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004831 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00004832
Daniel Veillard517752b1999-04-05 12:20:10 +00004833 if (atts != NULL) {
4834 for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
4835 free(atts);
4836 }
Daniel Veillard14fff061999-06-22 21:49:07 +00004837 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004838}
4839
Daniel Veillard11e00581998-10-24 18:27:49 +00004840/**
4841 * xmlParseEndTag:
4842 * @ctxt: an XML parser context
Daniel Veillard14fff061999-06-22 21:49:07 +00004843 * @tagname: the tag name as parsed in the opening tag.
Daniel Veillard11e00581998-10-24 18:27:49 +00004844 *
4845 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00004846 *
4847 * [42] ETag ::= '</' Name S? '>'
4848 *
4849 * With namespace
4850 *
Daniel Veillard517752b1999-04-05 12:20:10 +00004851 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00004852 */
4853
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004854void
Daniel Veillard14fff061999-06-22 21:49:07 +00004855xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
Daniel Veillard517752b1999-04-05 12:20:10 +00004856 CHAR *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004857
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004858 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004859 if ((CUR != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004861 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004862 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004863 return;
4864 }
4865 SKIP(2);
4866
Daniel Veillard517752b1999-04-05 12:20:10 +00004867 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004868
4869 /*
4870 * We should definitely be at the ending "S? '>'" part
4871 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004872 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004873 SKIP_BLANKS;
4874 if ((!IS_CHAR(CUR)) || (CUR != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004876 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004877 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004878 } else
4879 NEXT;
4880
Daniel Veillard517752b1999-04-05 12:20:10 +00004881 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004882 * [ WFC: Element Type Match ]
4883 * The Name in an element's end-tag must match the element type in the
4884 * start-tag.
4885 *
Daniel Veillard14fff061999-06-22 21:49:07 +00004886 */
4887 if (xmlStrcmp(name, tagname)) {
4888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4889 ctxt->sax->error(ctxt->userData,
4890 "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4891 ctxt->wellFormed = 0;
4892 }
4893
4894 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00004895 * SAX: End of Tag
4896 */
4897 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004898 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00004899
4900 if (name != NULL)
4901 free(name);
4902
Daniel Veillard260a68f1998-08-13 03:39:55 +00004903 return;
4904}
4905
Daniel Veillard11e00581998-10-24 18:27:49 +00004906/**
4907 * xmlParseCDSect:
4908 * @ctxt: an XML parser context
4909 *
4910 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004911 *
4912 * [18] CDSect ::= CDStart CData CDEnd
4913 *
4914 * [19] CDStart ::= '<![CDATA['
4915 *
4916 * [20] Data ::= (Char* - (Char* ']]>' Char*))
4917 *
4918 * [21] CDEnd ::= ']]>'
4919 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004920void
4921xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004922 const CHAR *base;
4923 CHAR r, s;
4924 CHAR cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004925
Daniel Veillardb05deb71999-08-10 19:04:08 +00004926 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004927 (NXT(2) == '[') && (NXT(3) == 'C') &&
4928 (NXT(4) == 'D') && (NXT(5) == 'A') &&
4929 (NXT(6) == 'T') && (NXT(7) == 'A') &&
4930 (NXT(8) == '[')) {
4931 SKIP(9);
4932 } else
4933 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004934
4935 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004936 base = CUR_PTR;
4937 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004939 ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004940 ctxt->wellFormed = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004941 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004942 return;
4943 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00004944 r = CUR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004945 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004946 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004948 ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004949 ctxt->wellFormed = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004950 ctxt->instate = XML_PARSER_CONTENT;
4951 return;
4952 }
4953 s = CUR;
4954 NEXT;
4955 cur = CUR;
4956 while (IS_CHAR(cur) &&
4957 ((r != ']') || (s != ']') || (cur != '>'))) {
4958 r = s;
4959 s = cur;
4960 NEXT;
4961 cur = CUR;
4962 }
4963 ctxt->instate = XML_PARSER_CONTENT;
4964 if (!IS_CHAR(CUR)) {
4965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4966 ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
4967 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004968 return;
4969 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004970 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004971
4972 /*
4973 * Ok the segment [base CUR_PTR] is to be consumed as chars.
4974 */
4975 if (ctxt->sax != NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004976 if (ctxt->sax->cdataBlock != NULL)
4977 ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004978 }
4979}
4980
Daniel Veillard11e00581998-10-24 18:27:49 +00004981/**
4982 * xmlParseContent:
4983 * @ctxt: an XML parser context
4984 *
4985 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00004986 *
4987 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
4988 */
4989
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004990void
4991xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004992 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004993 while ((CUR != '<') || (NXT(1) != '/')) {
4994 const CHAR *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004995 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004996
4997 /*
4998 * First case : a Processing Instruction.
4999 */
5000 if ((CUR == '<') && (NXT(1) == '?')) {
5001 xmlParsePI(ctxt);
5002 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005003
Daniel Veillard260a68f1998-08-13 03:39:55 +00005004 /*
5005 * Second case : a CDSection
5006 */
5007 else if ((CUR == '<') && (NXT(1) == '!') &&
5008 (NXT(2) == '[') && (NXT(3) == 'C') &&
5009 (NXT(4) == 'D') && (NXT(5) == 'A') &&
5010 (NXT(6) == 'T') && (NXT(7) == 'A') &&
5011 (NXT(8) == '[')) {
5012 xmlParseCDSect(ctxt);
5013 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005014
Daniel Veillard260a68f1998-08-13 03:39:55 +00005015 /*
5016 * Third case : a comment
5017 */
5018 else if ((CUR == '<') && (NXT(1) == '!') &&
5019 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillard517752b1999-04-05 12:20:10 +00005020 xmlParseComment(ctxt, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005021 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005022 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005023
Daniel Veillard260a68f1998-08-13 03:39:55 +00005024 /*
5025 * Fourth case : a sub-element.
5026 */
5027 else if (CUR == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00005028 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005029 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005030
Daniel Veillard260a68f1998-08-13 03:39:55 +00005031 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00005032 * Fifth case : a reference. If if has not been resolved,
5033 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00005034 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00005035
Daniel Veillard260a68f1998-08-13 03:39:55 +00005036 else if (CUR == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005037 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005038 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005039
Daniel Veillard260a68f1998-08-13 03:39:55 +00005040 /*
5041 * Last case, text. Note that References are handled directly.
5042 */
5043 else {
5044 xmlParseCharData(ctxt, 0);
5045 }
5046
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005047 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005048 /*
5049 * Pop-up of finished entities.
5050 */
Daniel Veillardbc50b591999-03-01 12:28:53 +00005051 while ((CUR == 0) && (ctxt->inputNr > 1))
5052 xmlPopInput(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005053
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005054 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005056 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005057 "detected an error in element content\n");
5058 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005059 break;
5060 }
5061 }
5062}
5063
Daniel Veillard11e00581998-10-24 18:27:49 +00005064/**
5065 * xmlParseElement:
5066 * @ctxt: an XML parser context
5067 *
5068 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00005069 *
5070 * [39] element ::= EmptyElemTag | STag content ETag
5071 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005072 * [ WFC: Element Type Match ]
5073 * The Name in an element's end-tag must match the element type in the
5074 * start-tag.
5075 *
5076 * [ VC: Element Valid ]
5077 * TODO An element is valid if there is a declaration matching elementdecl
5078 * where the Name matches the element type and one of the following holds:
5079 * - The declaration matches EMPTY and the element has no content.
5080 * - The declaration matches children and the sequence of child elements
5081 * belongs to the language generated by the regular expression in the
5082 * content model, with optional white space (characters matching the
5083 * nonterminal S) between each pair of child elements.
5084 * - The declaration matches Mixed and the content consists of character
5085 * data and child elements whose types match names in the content model.
5086 * - The declaration matches ANY, and the types of any child elements have
5087 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005088 */
5089
Daniel Veillard517752b1999-04-05 12:20:10 +00005090void
Daniel Veillard1e346af1999-02-22 10:33:01 +00005091xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005092 const CHAR *openTag = CUR_PTR;
Daniel Veillard14fff061999-06-22 21:49:07 +00005093 CHAR *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005094 xmlParserNodeInfo node_info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005095
5096 /* Capture start position */
5097 node_info.begin_pos = CUR_PTR - ctxt->input->base;
5098 node_info.begin_line = ctxt->input->line;
5099
Daniel Veillard14fff061999-06-22 21:49:07 +00005100 name = xmlParseStartTag(ctxt);
5101 if (name == NULL) {
5102 return;
5103 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005104
5105 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005106 * [ VC: Root Element Type ]
5107 * The Name in the document type declaration must match the element
5108 * type of the root element.
5109 */
5110 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5111 ctxt->node && (ctxt->node == ctxt->myDoc->root))
5112 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
5113
5114 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00005115 * Check for an Empty Element.
5116 */
5117 if ((CUR == '/') && (NXT(1) == '>')) {
5118 SKIP(2);
Daniel Veillard517752b1999-04-05 12:20:10 +00005119 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
Daniel Veillard14fff061999-06-22 21:49:07 +00005120 ctxt->sax->endElement(ctxt->userData, name);
5121 free(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00005122 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005123 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005124 if (CUR == '>') {
5125 NEXT;
5126 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005128 ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00005129 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005130 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005131
5132 /*
5133 * end of parsing of this node.
5134 */
5135 nodePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00005136 free(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00005137 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005138 }
5139
5140 /*
5141 * Parse the content of the element:
5142 */
5143 xmlParseContent(ctxt);
5144 if (!IS_CHAR(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005145 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005146 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00005147 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005148 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005149
5150 /*
5151 * end of parsing of this node.
5152 */
5153 nodePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00005154 free(name);
Daniel Veillard517752b1999-04-05 12:20:10 +00005155 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005156 }
5157
5158 /*
5159 * parse the end of tag: '</' should be here.
5160 */
Daniel Veillard14fff061999-06-22 21:49:07 +00005161 xmlParseEndTag(ctxt, name);
5162 free(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005163}
5164
Daniel Veillard11e00581998-10-24 18:27:49 +00005165/**
5166 * xmlParseVersionNum:
5167 * @ctxt: an XML parser context
5168 *
5169 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005170 *
5171 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00005172 *
5173 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005174 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005175CHAR *
5176xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005177 const CHAR *q = CUR_PTR;
5178 CHAR *ret;
5179
5180 while (IS_CHAR(CUR) &&
5181 (((CUR >= 'a') && (CUR <= 'z')) ||
5182 ((CUR >= 'A') && (CUR <= 'Z')) ||
5183 ((CUR >= '0') && (CUR <= '9')) ||
5184 (CUR == '_') || (CUR == '.') ||
5185 (CUR == ':') || (CUR == '-'))) NEXT;
5186 ret = xmlStrndup(q, CUR_PTR - q);
5187 return(ret);
5188}
5189
Daniel Veillard11e00581998-10-24 18:27:49 +00005190/**
5191 * xmlParseVersionInfo:
5192 * @ctxt: an XML parser context
5193 *
5194 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005195 *
5196 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5197 *
5198 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00005199 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005200 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00005201 */
5202
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005203CHAR *
5204xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005205 CHAR *version = NULL;
5206 const CHAR *q;
5207
5208 if ((CUR == 'v') && (NXT(1) == 'e') &&
5209 (NXT(2) == 'r') && (NXT(3) == 's') &&
5210 (NXT(4) == 'i') && (NXT(5) == 'o') &&
5211 (NXT(6) == 'n')) {
5212 SKIP(7);
5213 SKIP_BLANKS;
5214 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005216 ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005217 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005218 return(NULL);
5219 }
5220 NEXT;
5221 SKIP_BLANKS;
5222 if (CUR == '"') {
5223 NEXT;
5224 q = CUR_PTR;
5225 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005226 if (CUR != '"') {
5227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005228 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005229 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005230 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005231 NEXT;
5232 } else if (CUR == '\''){
5233 NEXT;
5234 q = CUR_PTR;
5235 version = xmlParseVersionNum(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005236 if (CUR != '\'') {
5237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005238 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005239 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005240 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005241 NEXT;
5242 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005244 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005245 "xmlParseVersionInfo : expected ' or \"\n");
5246 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005247 }
5248 }
5249 return(version);
5250}
5251
Daniel Veillard11e00581998-10-24 18:27:49 +00005252/**
5253 * xmlParseEncName:
5254 * @ctxt: an XML parser context
5255 *
5256 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00005257 *
5258 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00005259 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005260 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005261 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005262CHAR *
5263xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005264 const CHAR *q = CUR_PTR;
5265 CHAR *ret = NULL;
5266
5267 if (((CUR >= 'a') && (CUR <= 'z')) ||
5268 ((CUR >= 'A') && (CUR <= 'Z'))) {
5269 NEXT;
5270 while (IS_CHAR(CUR) &&
5271 (((CUR >= 'a') && (CUR <= 'z')) ||
5272 ((CUR >= 'A') && (CUR <= 'Z')) ||
5273 ((CUR >= '0') && (CUR <= '9')) ||
5274 (CUR == '-'))) NEXT;
5275 ret = xmlStrndup(q, CUR_PTR - q);
5276 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005277 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005278 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005279 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005280 }
5281 return(ret);
5282}
5283
Daniel Veillard11e00581998-10-24 18:27:49 +00005284/**
5285 * xmlParseEncodingDecl:
5286 * @ctxt: an XML parser context
5287 *
5288 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005289 *
5290 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00005291 *
5292 * TODO: this should setup the conversion filters.
5293 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005294 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005295 */
5296
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005297CHAR *
5298xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005299 CHAR *encoding = NULL;
5300 const CHAR *q;
5301
5302 SKIP_BLANKS;
5303 if ((CUR == 'e') && (NXT(1) == 'n') &&
5304 (NXT(2) == 'c') && (NXT(3) == 'o') &&
5305 (NXT(4) == 'd') && (NXT(5) == 'i') &&
5306 (NXT(6) == 'n') && (NXT(7) == 'g')) {
5307 SKIP(8);
5308 SKIP_BLANKS;
5309 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005311 ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005312 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005313 return(NULL);
5314 }
5315 NEXT;
5316 SKIP_BLANKS;
5317 if (CUR == '"') {
5318 NEXT;
5319 q = CUR_PTR;
5320 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005321 if (CUR != '"') {
5322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005323 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005324 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005325 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005326 NEXT;
5327 } else if (CUR == '\''){
5328 NEXT;
5329 q = CUR_PTR;
5330 encoding = xmlParseEncName(ctxt);
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005331 if (CUR != '\'') {
5332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005333 ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005334 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005335 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005336 NEXT;
5337 } else if (CUR == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005339 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005340 "xmlParseEncodingDecl : expected ' or \"\n");
5341 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005342 }
5343 }
5344 return(encoding);
5345}
5346
Daniel Veillard11e00581998-10-24 18:27:49 +00005347/**
5348 * xmlParseSDDecl:
5349 * @ctxt: an XML parser context
5350 *
5351 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005352 *
5353 * [32] SDDecl ::= S 'standalone' Eq
5354 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00005355 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005356 * [ VC: Standalone Document Declaration ]
5357 * TODO The standalone document declaration must have the value "no"
5358 * if any external markup declarations contain declarations of:
5359 * - attributes with default values, if elements to which these
5360 * attributes apply appear in the document without specifications
5361 * of values for these attributes, or
5362 * - entities (other than amp, lt, gt, apos, quot), if references
5363 * to those entities appear in the document, or
5364 * - attributes with values subject to normalization, where the
5365 * attribute appears in the document with a value which will change
5366 * as a result of normalization, or
5367 * - element types with element content, if white space occurs directly
5368 * within any instance of those types.
5369 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005370 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00005371 */
5372
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005373int
5374xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005375 int standalone = -1;
5376
5377 SKIP_BLANKS;
5378 if ((CUR == 's') && (NXT(1) == 't') &&
5379 (NXT(2) == 'a') && (NXT(3) == 'n') &&
5380 (NXT(4) == 'd') && (NXT(5) == 'a') &&
5381 (NXT(6) == 'l') && (NXT(7) == 'o') &&
5382 (NXT(8) == 'n') && (NXT(9) == 'e')) {
5383 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005384 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005385 if (CUR != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005387 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005388 "XML standalone declaration : expected '='\n");
5389 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005390 return(standalone);
5391 }
5392 NEXT;
5393 SKIP_BLANKS;
5394 if (CUR == '\''){
5395 NEXT;
5396 if ((CUR == 'n') && (NXT(1) == 'o')) {
5397 standalone = 0;
5398 SKIP(2);
5399 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5400 (NXT(2) == 's')) {
5401 standalone = 1;
5402 SKIP(3);
5403 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005405 ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005406 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005407 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005408 if (CUR != '\'') {
5409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005410 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005411 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005412 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005413 NEXT;
5414 } else if (CUR == '"'){
5415 NEXT;
5416 if ((CUR == 'n') && (NXT(1) == 'o')) {
5417 standalone = 0;
5418 SKIP(2);
5419 } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5420 (NXT(2) == 's')) {
5421 standalone = 1;
5422 SKIP(3);
5423 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005425 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005426 "standalone accepts only 'yes' or 'no'\n");
5427 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005428 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005429 if (CUR != '"') {
5430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005431 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005432 ctxt->wellFormed = 0;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005433 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00005434 NEXT;
5435 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005437 ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005438 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005439 }
5440 }
5441 return(standalone);
5442}
5443
Daniel Veillard11e00581998-10-24 18:27:49 +00005444/**
5445 * xmlParseXMLDecl:
5446 * @ctxt: an XML parser context
5447 *
5448 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00005449 *
5450 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
5451 */
5452
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005453void
5454xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005455 CHAR *version;
5456
5457 /*
5458 * We know that '<?xml' is here.
5459 */
5460 SKIP(5);
5461
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005462 if (!IS_BLANK(CUR)) {
5463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005464 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005465 ctxt->wellFormed = 0;
5466 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005467 SKIP_BLANKS;
5468
5469 /*
5470 * We should have the VersionInfo here.
5471 */
5472 version = xmlParseVersionInfo(ctxt);
5473 if (version == NULL)
5474 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00005475 ctxt->version = xmlStrdup(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005476 free(version);
5477
5478 /*
5479 * We may have the encoding declaration
5480 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005481 if (!IS_BLANK(CUR)) {
5482 if ((CUR == '?') && (NXT(1) == '>')) {
5483 SKIP(2);
5484 return;
5485 }
5486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005487 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005488 ctxt->wellFormed = 0;
5489 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005490 ctxt->encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005491
5492 /*
5493 * We may have the standalone status.
5494 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005495 if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005496 if ((CUR == '?') && (NXT(1) == '>')) {
5497 SKIP(2);
5498 return;
5499 }
5500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005501 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005502 ctxt->wellFormed = 0;
5503 }
5504 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00005505 ctxt->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005506
5507 SKIP_BLANKS;
5508 if ((CUR == '?') && (NXT(1) == '>')) {
5509 SKIP(2);
5510 } else if (CUR == '>') {
5511 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005513 ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005514 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005515 NEXT;
5516 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005518 ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005519 ctxt->wellFormed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005520 MOVETO_ENDTAG(CUR_PTR);
5521 NEXT;
5522 }
5523}
5524
Daniel Veillard11e00581998-10-24 18:27:49 +00005525/**
5526 * xmlParseMisc:
5527 * @ctxt: an XML parser context
5528 *
5529 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005530 *
5531 * [27] Misc ::= Comment | PI | S
5532 */
5533
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005534void
5535xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005536 while (((CUR == '<') && (NXT(1) == '?')) ||
5537 ((CUR == '<') && (NXT(1) == '!') &&
5538 (NXT(2) == '-') && (NXT(3) == '-')) ||
5539 IS_BLANK(CUR)) {
5540 if ((CUR == '<') && (NXT(1) == '?')) {
5541 xmlParsePI(ctxt);
5542 } else if (IS_BLANK(CUR)) {
5543 NEXT;
5544 } else
5545 xmlParseComment(ctxt, 0);
5546 }
5547}
5548
Daniel Veillard11e00581998-10-24 18:27:49 +00005549/**
5550 * xmlParseDocument :
5551 * @ctxt: an XML parser context
5552 *
5553 * parse an XML document (and build a tree if using the standard SAX
5554 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00005555 *
5556 * [1] document ::= prolog element Misc*
5557 *
5558 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00005559 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005560 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00005561 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005562 */
5563
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005564int
5565xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005566 xmlDefaultSAXHandlerInit();
5567
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005568 GROW;
5569
Daniel Veillard260a68f1998-08-13 03:39:55 +00005570 /*
5571 * SAX: beginning of the document processing.
5572 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005573 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00005574 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005575
5576 /*
5577 * We should check for encoding here and plug-in some
5578 * conversion code TODO !!!!
5579 */
5580
5581 /*
5582 * Wipe out everything which is before the first '<'
5583 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005584 if (IS_BLANK(CUR)) {
5585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005586 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005587 "Extra spaces at the beginning of the document are not allowed\n");
5588 ctxt->wellFormed = 0;
5589 SKIP_BLANKS;
5590 }
5591
5592 if (CUR == 0) {
5593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005594 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005595 ctxt->wellFormed = 0;
5596 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005597
5598 /*
5599 * Check for the XMLDecl in the Prolog.
5600 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005601 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005602 if ((CUR == '<') && (NXT(1) == '?') &&
5603 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5604 (NXT(4) == 'l')) {
5605 xmlParseXMLDecl(ctxt);
5606 /* SKIP_EOL(cur); */
5607 SKIP_BLANKS;
5608 } else if ((CUR == '<') && (NXT(1) == '?') &&
5609 (NXT(2) == 'X') && (NXT(3) == 'M') &&
5610 (NXT(4) == 'L')) {
5611 /*
5612 * The first drafts were using <?XML and the final W3C REC
5613 * now use <?xml ...
5614 */
5615 xmlParseXMLDecl(ctxt);
5616 /* SKIP_EOL(cur); */
5617 SKIP_BLANKS;
5618 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00005619 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005620 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005621 if ((ctxt->sax) && (ctxt->sax->startDocument))
Daniel Veillard27d88741999-05-29 11:51:49 +00005622 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005623
5624 /*
5625 * The Misc part of the Prolog
5626 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005627 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005628 xmlParseMisc(ctxt);
5629
5630 /*
5631 * Then possibly doc type declaration(s) and more Misc
5632 * (doctypedecl Misc*)?
5633 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005634 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005635 if ((CUR == '<') && (NXT(1) == '!') &&
5636 (NXT(2) == 'D') && (NXT(3) == 'O') &&
5637 (NXT(4) == 'C') && (NXT(5) == 'T') &&
5638 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5639 (NXT(8) == 'E')) {
5640 xmlParseDocTypeDecl(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005641 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005642 xmlParseMisc(ctxt);
5643 }
5644
5645 /*
5646 * Time to start parsing the tree itself
5647 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005648 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005649 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard517752b1999-04-05 12:20:10 +00005650 xmlParseElement(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005651 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005652
5653 /*
5654 * The Misc part at the end
5655 */
5656 xmlParseMisc(ctxt);
5657
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005658 if (CUR != 0) {
5659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005660 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005661 "Extra content at the end of the document\n");
5662 ctxt->wellFormed = 0;
5663 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005664 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005665
Daniel Veillard260a68f1998-08-13 03:39:55 +00005666 /*
5667 * SAX: end of the document processing.
5668 */
Daniel Veillard517752b1999-04-05 12:20:10 +00005669 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005670 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005671 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005672 return(0);
5673}
5674
Daniel Veillardb05deb71999-08-10 19:04:08 +00005675/************************************************************************
5676 * *
5677 * I/O front end functions to the parser *
5678 * *
5679 ************************************************************************/
5680
Daniel Veillard11e00581998-10-24 18:27:49 +00005681/**
Daniel Veillardbe70ff71999-07-05 16:50:46 +00005682 * xmlCreateDocParserCtxt :
Daniel Veillardd692aa41999-02-28 21:54:31 +00005683 * @cur: a pointer to an array of CHAR
5684 *
5685 * Create a parser context for an XML in-memory document.
5686 *
5687 * Returns the new parser context or NULL
5688 */
5689xmlParserCtxtPtr
5690xmlCreateDocParserCtxt(CHAR *cur) {
5691 xmlParserCtxtPtr ctxt;
5692 xmlParserInputPtr input;
Daniel Veillard27d88741999-05-29 11:51:49 +00005693 xmlCharEncoding enc;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005694
Daniel Veillardb05deb71999-08-10 19:04:08 +00005695 ctxt = xmlNewParserCtxt();
Daniel Veillardd692aa41999-02-28 21:54:31 +00005696 if (ctxt == NULL) {
Daniel Veillardd692aa41999-02-28 21:54:31 +00005697 return(NULL);
5698 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005699 input = xmlNewInputStream(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00005700 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005701 xmlFreeParserCtxt(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00005702 return(NULL);
5703 }
5704
Daniel Veillard27d88741999-05-29 11:51:49 +00005705 /*
5706 * plug some encoding conversion routines here. !!!
5707 */
5708 enc = xmlDetectCharEncoding(cur);
5709 xmlSwitchEncoding(ctxt, enc);
5710
Daniel Veillardd692aa41999-02-28 21:54:31 +00005711 input->base = cur;
5712 input->cur = cur;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005713
5714 inputPush(ctxt, input);
5715 return(ctxt);
5716}
5717
5718/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005719 * xmlSAXParseDoc :
5720 * @sax: the SAX handler block
Daniel Veillard11e00581998-10-24 18:27:49 +00005721 * @cur: a pointer to an array of CHAR
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005722 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5723 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00005724 *
5725 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005726 * It use the given SAX function block to handle the parsing callback.
5727 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00005728 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005729 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00005730 */
5731
Daniel Veillard1e346af1999-02-22 10:33:01 +00005732xmlDocPtr
5733xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005734 xmlDocPtr ret;
5735 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005736
5737 if (cur == NULL) return(NULL);
5738
Daniel Veillardd692aa41999-02-28 21:54:31 +00005739
5740 ctxt = xmlCreateDocParserCtxt(cur);
5741 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00005742 if (sax != NULL) {
5743 ctxt->sax = sax;
5744 ctxt->userData = NULL;
5745 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005746
5747 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00005748 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005749 else {
5750 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00005751 xmlFreeDoc(ctxt->myDoc);
5752 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005753 }
Daniel Veillard97fea181999-06-26 23:07:37 +00005754 if (sax != NULL)
5755 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005756 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005757
5758 return(ret);
5759}
5760
Daniel Veillard11e00581998-10-24 18:27:49 +00005761/**
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005762 * xmlParseDoc :
5763 * @cur: a pointer to an array of CHAR
5764 *
5765 * parse an XML in-memory document and build a tree.
5766 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005767 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005768 */
5769
Daniel Veillard1e346af1999-02-22 10:33:01 +00005770xmlDocPtr
5771xmlParseDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005772 return(xmlSAXParseDoc(NULL, cur, 0));
5773}
5774
5775/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00005776 * xmlSAXParseDTD :
5777 * @sax: the SAX handler block
5778 * @ExternalID: a NAME* containing the External ID of the DTD
5779 * @SystemID: a NAME* containing the URL to the DTD
5780 *
5781 * Load and parse an external subset.
5782 *
5783 * Returns the resulting xmlDtdPtr or NULL in case of error.
5784 */
5785
5786xmlDtdPtr
5787xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5788 const CHAR *SystemID) {
5789 xmlDtdPtr ret = NULL;
5790 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +00005791 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005792 xmlCharEncoding enc;
5793
5794 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5795
Daniel Veillardb05deb71999-08-10 19:04:08 +00005796 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +00005797 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005798 return(NULL);
5799 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005800
5801 /*
5802 * Set-up the SAX context
5803 */
5804 if (ctxt == NULL) return(NULL);
5805 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005806 if (ctxt->sax != NULL)
5807 free(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005808 ctxt->sax = sax;
5809 ctxt->userData = NULL;
5810 }
5811
5812 /*
5813 * Ask the Entity resolver to load the damn thing
5814 */
5815
5816 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5817 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5818 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +00005819 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005820 xmlFreeParserCtxt(ctxt);
5821 return(NULL);
5822 }
5823
5824 /*
5825 * plug some encoding conversion routines here. !!!
5826 */
5827 xmlPushInput(ctxt, input);
5828 enc = xmlDetectCharEncoding(ctxt->input->cur);
5829 xmlSwitchEncoding(ctxt, enc);
5830
Daniel Veillardb05deb71999-08-10 19:04:08 +00005831 if (input->filename == NULL)
5832 input->filename = xmlStrdup(SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005833 input->line = 1;
5834 input->col = 1;
5835 input->base = ctxt->input->cur;
5836 input->cur = ctxt->input->cur;
5837 input->free = NULL;
5838
5839 /*
5840 * let's parse that entity knowing it's an external subset.
5841 */
5842 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
5843
5844 if (ctxt->myDoc != NULL) {
5845 if (ctxt->wellFormed) {
5846 ret = ctxt->myDoc->intSubset;
5847 ctxt->myDoc->intSubset = NULL;
5848 } else {
5849 ret = NULL;
5850 }
5851 xmlFreeDoc(ctxt->myDoc);
5852 ctxt->myDoc = NULL;
5853 }
Daniel Veillard97fea181999-06-26 23:07:37 +00005854 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005855 xmlFreeParserCtxt(ctxt);
5856
5857 return(ret);
5858}
5859
5860/**
5861 * xmlParseDTD :
5862 * @ExternalID: a NAME* containing the External ID of the DTD
5863 * @SystemID: a NAME* containing the URL to the DTD
5864 *
5865 * Load and parse an external subset.
5866 *
5867 * Returns the resulting xmlDtdPtr or NULL in case of error.
5868 */
5869
5870xmlDtdPtr
5871xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5872 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
5873}
5874
5875/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005876 * xmlRecoverDoc :
5877 * @cur: a pointer to an array of CHAR
5878 *
5879 * parse an XML in-memory document and build a tree.
5880 * In the case the document is not Well Formed, a tree is built anyway
5881 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005882 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005883 */
5884
Daniel Veillard1e346af1999-02-22 10:33:01 +00005885xmlDocPtr
5886xmlRecoverDoc(CHAR *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005887 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005888}
5889
5890/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00005891 * xmlCreateFileParserCtxt :
Daniel Veillard11e00581998-10-24 18:27:49 +00005892 * @filename: the filename
5893 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00005894 * Create a parser context for a file content.
5895 * Automatic support for ZLIB/Compress compressed document is provided
5896 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +00005897 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00005898 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00005899 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00005900xmlParserCtxtPtr
5901xmlCreateFileParserCtxt(const char *filename)
5902{
5903 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005904 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005905 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005906 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005907
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005908 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5909 if (buf == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005910
Daniel Veillardb05deb71999-08-10 19:04:08 +00005911 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00005912 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005913 return(NULL);
5914 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005915
5916 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005917 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005918 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005919 return(NULL);
5920 }
5921
5922 inputStream->filename = strdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005923 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005924 inputStream->base = inputStream->buf->buffer->content;
5925 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005926
5927 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005928 if ((ctxt->directory == NULL) && (directory == NULL))
5929 directory = xmlParserGetDirectory(filename);
5930 if ((ctxt->directory == NULL) && (directory != NULL))
5931 ctxt->directory = directory;
5932
Daniel Veillardd692aa41999-02-28 21:54:31 +00005933 return(ctxt);
5934}
5935
5936/**
5937 * xmlSAXParseFile :
5938 * @sax: the SAX handler block
5939 * @filename: the filename
5940 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5941 * documents
5942 *
5943 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5944 * compressed document is provided by default if found at compile-time.
5945 * It use the given SAX function block to handle the parsing callback.
5946 * If sax is NULL, fallback to the default DOM tree building routines.
5947 *
5948 * Returns the resulting document tree
5949 */
5950
Daniel Veillard011b63c1999-06-02 17:44:04 +00005951xmlDocPtr
5952xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +00005953 int recovery) {
5954 xmlDocPtr ret;
5955 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005956 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005957
5958 ctxt = xmlCreateFileParserCtxt(filename);
5959 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00005960 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005961 if (ctxt->sax != NULL)
5962 free(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +00005963 ctxt->sax = sax;
5964 ctxt->userData = NULL;
5965 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005966
Daniel Veillardb05deb71999-08-10 19:04:08 +00005967 if ((ctxt->directory == NULL) && (directory == NULL))
5968 directory = xmlParserGetDirectory(filename);
5969 if ((ctxt->directory == NULL) && (directory != NULL))
5970 ctxt->directory = xmlStrdup(directory);
5971
Daniel Veillard260a68f1998-08-13 03:39:55 +00005972 xmlParseDocument(ctxt);
5973
Daniel Veillard517752b1999-04-05 12:20:10 +00005974 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005975 else {
5976 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00005977 xmlFreeDoc(ctxt->myDoc);
5978 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005979 }
Daniel Veillard97fea181999-06-26 23:07:37 +00005980 if (sax != NULL)
5981 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00005982 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005983
5984 return(ret);
5985}
5986
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005987/**
5988 * xmlParseFile :
5989 * @filename: the filename
5990 *
5991 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5992 * compressed document is provided by default if found at compile-time.
5993 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005994 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00005995 */
5996
Daniel Veillard011b63c1999-06-02 17:44:04 +00005997xmlDocPtr
5998xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005999 return(xmlSAXParseFile(NULL, filename, 0));
6000}
6001
6002/**
6003 * xmlRecoverFile :
6004 * @filename: the filename
6005 *
6006 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6007 * compressed document is provided by default if found at compile-time.
6008 * In the case the document is not Well Formed, a tree is built anyway
6009 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006010 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006011 */
6012
Daniel Veillard011b63c1999-06-02 17:44:04 +00006013xmlDocPtr
6014xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006015 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006016}
Daniel Veillard260a68f1998-08-13 03:39:55 +00006017
Daniel Veillard11e00581998-10-24 18:27:49 +00006018/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00006019 * xmlCreateMemoryParserCtxt :
Daniel Veillard1e346af1999-02-22 10:33:01 +00006020 * @buffer: an pointer to a char array
Daniel Veillard11e00581998-10-24 18:27:49 +00006021 * @size: the siwe of the array
6022 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00006023 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +00006024 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00006025 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006026 */
Daniel Veillardd692aa41999-02-28 21:54:31 +00006027xmlParserCtxtPtr
6028xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006029 xmlParserCtxtPtr ctxt;
6030 xmlParserInputPtr input;
Daniel Veillard27d88741999-05-29 11:51:49 +00006031 xmlCharEncoding enc;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006032
6033 buffer[size - 1] = '\0';
6034
Daniel Veillardb05deb71999-08-10 19:04:08 +00006035 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +00006036 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006037 return(NULL);
6038 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006039
6040 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006041 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006042 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006043 return(NULL);
6044 }
6045
6046 input->filename = NULL;
6047 input->line = 1;
6048 input->col = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006049 input->buf = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006050 input->consumed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006051
6052 /*
Daniel Veillard27d88741999-05-29 11:51:49 +00006053 * plug some encoding conversion routines here. !!!
Daniel Veillard260a68f1998-08-13 03:39:55 +00006054 */
Daniel Veillard27d88741999-05-29 11:51:49 +00006055 enc = xmlDetectCharEncoding(buffer);
6056 xmlSwitchEncoding(ctxt, enc);
6057
Daniel Veillard260a68f1998-08-13 03:39:55 +00006058 input->base = buffer;
6059 input->cur = buffer;
Daniel Veillardd692aa41999-02-28 21:54:31 +00006060 input->free = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006061
6062 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00006063 return(ctxt);
6064}
6065
6066/**
6067 * xmlSAXParseMemory :
6068 * @sax: the SAX handler block
6069 * @buffer: an pointer to a char array
6070 * @size: the siwe of the array
6071 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6072 * documents
6073 *
6074 * parse an XML in-memory block and use the given SAX function block
6075 * to handle the parsing callback. If sax is NULL, fallback to the default
6076 * DOM tree building routines.
6077 *
Daniel Veillardd692aa41999-02-28 21:54:31 +00006078 * Returns the resulting document tree
6079 */
6080xmlDocPtr
6081xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6082 xmlDocPtr ret;
6083 xmlParserCtxtPtr ctxt;
6084
6085 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6086 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00006087 if (sax != NULL) {
6088 ctxt->sax = sax;
6089 ctxt->userData = NULL;
6090 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006091
6092 xmlParseDocument(ctxt);
6093
Daniel Veillard517752b1999-04-05 12:20:10 +00006094 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006095 else {
6096 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00006097 xmlFreeDoc(ctxt->myDoc);
6098 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006099 }
Daniel Veillard97fea181999-06-26 23:07:37 +00006100 if (sax != NULL)
6101 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00006102 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006103
6104 return(ret);
6105}
6106
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006107/**
6108 * xmlParseMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00006109 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006110 * @size: the size of the array
6111 *
6112 * parse an XML in-memory block and build a tree.
6113 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006114 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006115 */
6116
6117xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006118 return(xmlSAXParseMemory(NULL, buffer, size, 0));
6119}
6120
6121/**
6122 * xmlRecoverMemory :
Daniel Veillard1e346af1999-02-22 10:33:01 +00006123 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006124 * @size: the size of the array
6125 *
6126 * parse an XML in-memory block and build a tree.
6127 * In the case the document is not Well Formed, a tree is built anyway
6128 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006129 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006130 */
6131
6132xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6133 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +00006134}
Daniel Veillard260a68f1998-08-13 03:39:55 +00006135
Daniel Veillard260a68f1998-08-13 03:39:55 +00006136
Daniel Veillard11e00581998-10-24 18:27:49 +00006137/**
6138 * xmlSetupParserForBuffer:
6139 * @ctxt: an XML parser context
6140 * @buffer: a CHAR * buffer
6141 * @filename: a file name
6142 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00006143 * Setup the parser context to parse a new buffer; Clears any prior
6144 * contents from the parser context. The buffer parameter must not be
6145 * NULL, but the filename parameter can be
6146 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006147void
6148xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +00006149 const char* filename)
6150{
Daniel Veillardb05deb71999-08-10 19:04:08 +00006151 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006152
Daniel Veillardb05deb71999-08-10 19:04:08 +00006153 input = xmlNewInputStream(ctxt);
6154 if (input == NULL) {
6155 perror("malloc");
6156 free(ctxt);
6157 exit(1);
6158 }
6159
6160 xmlClearParserCtxt(ctxt);
6161 if (filename != NULL)
6162 input->filename = strdup(filename);
6163 input->base = buffer;
6164 input->cur = buffer;
6165 inputPush(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006166}
6167
6168
Daniel Veillardb05deb71999-08-10 19:04:08 +00006169/************************************************************************
6170 * *
6171 * Miscelaneous *
6172 * *
6173 ************************************************************************/
6174
6175
Daniel Veillard11e00581998-10-24 18:27:49 +00006176/**
6177 * xmlParserFindNodeInfo:
6178 * @ctxt: an XML parser context
6179 * @node: an XML node within the tree
6180 *
6181 * Find the parser node info struct for a given node
6182 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006183 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00006184 */
6185const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6186 const xmlNode* node)
6187{
6188 unsigned long pos;
6189
6190 /* Find position where node should be at */
6191 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6192 if ( ctx->node_seq.buffer[pos].node == node )
6193 return &ctx->node_seq.buffer[pos];
6194 else
6195 return NULL;
6196}
6197
6198
Daniel Veillard11e00581998-10-24 18:27:49 +00006199/**
6200 * xmlInitNodeInfoSeq :
6201 * @seq: a node info sequence pointer
6202 *
6203 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00006204 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006205void
6206xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006207{
6208 seq->length = 0;
6209 seq->maximum = 0;
6210 seq->buffer = NULL;
6211}
6212
Daniel Veillard11e00581998-10-24 18:27:49 +00006213/**
6214 * xmlClearNodeInfoSeq :
6215 * @seq: a node info sequence pointer
6216 *
6217 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +00006218 * info sequence
6219 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006220void
6221xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006222{
6223 if ( seq->buffer != NULL )
6224 free(seq->buffer);
6225 xmlInitNodeInfoSeq(seq);
6226}
6227
6228
Daniel Veillard11e00581998-10-24 18:27:49 +00006229/**
6230 * xmlParserFindNodeInfoIndex:
6231 * @seq: a node info sequence pointer
6232 * @node: an XML node pointer
6233 *
6234 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00006235 * xmlParserFindNodeInfoIndex : Find the index that the info record for
6236 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +00006237 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00006238 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +00006239 */
6240unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6241 const xmlNode* node)
6242{
6243 unsigned long upper, lower, middle;
6244 int found = 0;
6245
6246 /* Do a binary search for the key */
6247 lower = 1;
6248 upper = seq->length;
6249 middle = 0;
6250 while ( lower <= upper && !found) {
6251 middle = lower + (upper - lower) / 2;
6252 if ( node == seq->buffer[middle - 1].node )
6253 found = 1;
6254 else if ( node < seq->buffer[middle - 1].node )
6255 upper = middle - 1;
6256 else
6257 lower = middle + 1;
6258 }
6259
6260 /* Return position */
6261 if ( middle == 0 || seq->buffer[middle - 1].node < node )
6262 return middle;
6263 else
6264 return middle - 1;
6265}
6266
6267
Daniel Veillard11e00581998-10-24 18:27:49 +00006268/**
6269 * xmlParserAddNodeInfo:
6270 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00006271 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +00006272 *
6273 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +00006274 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006275void
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006276xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +00006277 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006278{
6279 unsigned long pos;
6280 static unsigned int block_size = 5;
6281
6282 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006283 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6284 if ( pos < ctxt->node_seq.length
6285 && ctxt->node_seq.buffer[pos].node == info->node ) {
6286 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006287 }
6288
6289 /* Otherwise, we need to add new node to buffer */
6290 else {
6291 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006292 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006293 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006294 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6295 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +00006296
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006297 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard260a68f1998-08-13 03:39:55 +00006298 tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
6299 else
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006300 tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006301
6302 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006304 ctxt->sax->error(ctxt->userData, "Out of memory\n");
Daniel Veillard260a68f1998-08-13 03:39:55 +00006305 return;
6306 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006307 ctxt->node_seq.buffer = tmp_buffer;
6308 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006309 }
6310
6311 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006312 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006313 unsigned long i;
6314
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006315 for ( i = ctxt->node_seq.length; i > pos; i-- )
6316 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +00006317 }
6318
6319 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006320 ctxt->node_seq.buffer[pos] = *info;
6321 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006322 }
6323}
Daniel Veillard011b63c1999-06-02 17:44:04 +00006324
6325
Daniel Veillardb05deb71999-08-10 19:04:08 +00006326/**
6327 * xmlSubstituteEntitiesDefault :
6328 * @val: int 0 or 1
6329 *
6330 * Set and return the previous value for default entity support.
6331 * Initially the parser always keep entity references instead of substituting
6332 * entity values in the output. This function has to be used to change the
6333 * default parser behaviour
6334 * SAX::subtituteEntities() has to be used for changing that on a file by
6335 * file basis.
6336 *
6337 * Returns the last value for 0 for no substitution, 1 for substitution.
6338 */
6339
6340int
6341xmlSubstituteEntitiesDefault(int val) {
6342 int old = xmlSubstituteEntitiesDefaultValue;
6343
6344 xmlSubstituteEntitiesDefaultValue = val;
6345 return(old);
6346}
6347