blob: b43f53068dafb9e60b31a3e754f705b6782f839b [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
Daniel Veillard260a68f1998-08-13 03:39:55 +000015#include <stdio.h>
Daniel Veillard260a68f1998-08-13 03:39:55 +000016#include <string.h> /* for memset() only */
Daniel Veillard7f7d1111999-09-22 09:46:25 +000017#ifdef HAVE_CTYPE_H
18#include <ctype.h>
19#endif
20#ifdef HAVE_STDLIB_H
Seth Alvese7f12e61998-10-01 20:51:15 +000021#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022#endif
23#ifdef HAVE_SYS_STAT_H
Daniel Veillard260a68f1998-08-13 03:39:55 +000024#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000025#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +000026#ifdef HAVE_FCNTL_H
27#include <fcntl.h>
28#endif
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32#ifdef HAVE_ZLIB_H
33#include <zlib.h>
34#endif
35
Daniel Veillard6454aec1999-09-02 22:04:43 +000036#include "xmlmemory.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000037#include "tree.h"
38#include "parser.h"
39#include "entities.h"
Daniel Veillard27d88741999-05-29 11:51:49 +000040#include "encoding.h"
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000041#include "valid.h"
Daniel Veillard1e346af1999-02-22 10:33:01 +000042#include "parserInternals.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000043#include "xmlIO.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000044#include "xml-error.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000045
Daniel Veillarddbfd6411999-12-28 16:35:14 +000046#define XML_PARSER_BIG_BUFFER_SIZE 1000
47#define XML_PARSER_BUFFER_SIZE 100
48
Daniel Veillard14fff061999-06-22 21:49:07 +000049const char *xmlParserVersion = LIBXML_VERSION;
Daniel Veillardcf461992000-03-14 18:30:20 +000050int xmlGetWarningsDefaultValue = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +000051
Daniel Veillard3c558c31999-12-22 11:30:41 +000052/*
53 * List of XML prefixed PI allowed by W3C specs
54 */
55
56const char *xmlW3CPIs[] = {
57 "xml-stylesheet",
58 NULL
59};
Daniel Veillarde2d034d1999-07-27 19:52:06 +000060
Daniel Veillardcf461992000-03-14 18:30:20 +000061void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64 const xmlChar **str);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000065/************************************************************************
66 * *
67 * Input handling functions for progressive parsing *
68 * *
69 ************************************************************************/
70
71/* #define DEBUG_INPUT */
Daniel Veillarddbfd6411999-12-28 16:35:14 +000072/* #define DEBUG_STACK */
73/* #define DEBUG_PUSH */
74
Daniel Veillarde2d034d1999-07-27 19:52:06 +000075
Daniel Veillardb05deb71999-08-10 19:04:08 +000076#define INPUT_CHUNK 250
77/* we need to keep enough input to show errors in context */
78#define LINE_LEN 80
Daniel Veillarde2d034d1999-07-27 19:52:06 +000079
80#ifdef DEBUG_INPUT
81#define CHECK_BUFFER(in) check_buffer(in)
Daniel Veillarde2d034d1999-07-27 19:52:06 +000082
83void check_buffer(xmlParserInputPtr in) {
84 if (in->base != in->buf->buffer->content) {
85 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86 }
87 if (in->cur < in->base) {
88 fprintf(stderr, "xmlParserInput: cur < base problem\n");
89 }
90 if (in->cur > in->base + in->buf->buffer->use) {
91 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92 }
93 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95 in->buf->buffer->use, in->buf->buffer->size);
96}
97
Daniel Veillardb05deb71999-08-10 19:04:08 +000098#else
99#define CHECK_BUFFER(in)
100#endif
101
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000102
103/**
104 * xmlParserInputRead:
105 * @in: an XML parser input
106 * @len: an indicative size for the lookahead
107 *
108 * This function refresh the input for the parser. It doesn't try to
109 * preserve pointers to the input buffer, and discard already read data
110 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000111 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000112 * end of this entity
113 */
114int
115xmlParserInputRead(xmlParserInputPtr in, int len) {
116 int ret;
117 int used;
118 int index;
119
120#ifdef DEBUG_INPUT
121 fprintf(stderr, "Read\n");
122#endif
123 if (in->buf == NULL) return(-1);
124 if (in->base == NULL) return(-1);
125 if (in->cur == NULL) return(-1);
126 if (in->buf->buffer == NULL) return(-1);
127
128 CHECK_BUFFER(in);
129
130 used = in->cur - in->buf->buffer->content;
131 ret = xmlBufferShrink(in->buf->buffer, used);
132 if (ret > 0) {
133 in->cur -= ret;
134 in->consumed += ret;
135 }
136 ret = xmlParserInputBufferRead(in->buf, len);
137 if (in->base != in->buf->buffer->content) {
138 /*
139 * the buffer has been realloced
140 */
141 index = in->cur - in->base;
142 in->base = in->buf->buffer->content;
143 in->cur = &in->buf->buffer->content[index];
144 }
145
146 CHECK_BUFFER(in);
147
148 return(ret);
149}
150
151/**
152 * xmlParserInputGrow:
153 * @in: an XML parser input
154 * @len: an indicative size for the lookahead
155 *
156 * This function increase the input for the parser. It tries to
157 * preserve pointers to the input buffer, and keep already read data
158 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000159 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000160 * end of this entity
161 */
162int
163xmlParserInputGrow(xmlParserInputPtr in, int len) {
164 int ret;
165 int index;
166
167#ifdef DEBUG_INPUT
168 fprintf(stderr, "Grow\n");
169#endif
170 if (in->buf == NULL) return(-1);
171 if (in->base == NULL) return(-1);
172 if (in->cur == NULL) return(-1);
173 if (in->buf->buffer == NULL) return(-1);
174
175 CHECK_BUFFER(in);
176
177 index = in->cur - in->base;
178 if (in->buf->buffer->use > index + INPUT_CHUNK) {
179
180 CHECK_BUFFER(in);
181
182 return(0);
183 }
Daniel Veillardda07c342000-01-25 18:31:22 +0000184 if ((in->buf->httpIO != NULL) || (in->buf->ftpIO != NULL) ||
185 (in->buf->file != NULL) ||
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000186#ifdef HAVE_ZLIB_H
187 (in->buf->gzfile != NULL) ||
188#endif
189 (in->buf->fd >= 0))
190 ret = xmlParserInputBufferGrow(in->buf, len);
191 else
192 return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000193
194 /*
195 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
196 * block, but we use it really as an integer to do some
197 * pointer arithmetic. Insure will raise it as a bug but in
198 * that specific case, that's not !
199 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000200 if (in->base != in->buf->buffer->content) {
201 /*
202 * the buffer has been realloced
203 */
204 index = in->cur - in->base;
205 in->base = in->buf->buffer->content;
206 in->cur = &in->buf->buffer->content[index];
207 }
208
209 CHECK_BUFFER(in);
210
211 return(ret);
212}
213
214/**
215 * xmlParserInputShrink:
216 * @in: an XML parser input
217 *
218 * This function removes used input for the parser.
219 */
220void
221xmlParserInputShrink(xmlParserInputPtr in) {
222 int used;
223 int ret;
224 int index;
225
226#ifdef DEBUG_INPUT
227 fprintf(stderr, "Shrink\n");
228#endif
229 if (in->buf == NULL) return;
230 if (in->base == NULL) return;
231 if (in->cur == NULL) return;
232 if (in->buf->buffer == NULL) return;
233
234 CHECK_BUFFER(in);
235
236 used = in->cur - in->buf->buffer->content;
237 if (used > INPUT_CHUNK) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000238 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000239 if (ret > 0) {
240 in->cur -= ret;
241 in->consumed += ret;
242 }
243 }
244
245 CHECK_BUFFER(in);
246
247 if (in->buf->buffer->use > INPUT_CHUNK) {
248 return;
249 }
250 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
251 if (in->base != in->buf->buffer->content) {
252 /*
253 * the buffer has been realloced
254 */
255 index = in->cur - in->base;
256 in->base = in->buf->buffer->content;
257 in->cur = &in->buf->buffer->content[index];
258 }
259
260 CHECK_BUFFER(in);
261}
262
Daniel Veillard260a68f1998-08-13 03:39:55 +0000263/************************************************************************
264 * *
265 * Parser stacks related functions and macros *
266 * *
267 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000268
269int xmlSubstituteEntitiesDefaultValue = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000270int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000271int xmlKeepBlanksDefaultValue = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000272xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
273 const xmlChar ** str);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000274
Daniel Veillard260a68f1998-08-13 03:39:55 +0000275/*
276 * Generic function for accessing stacks in the Parser Context
277 */
278
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000279#define PUSH_AND_POP(scope, type, name) \
280scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000281 if (ctxt->name##Nr >= ctxt->name##Max) { \
282 ctxt->name##Max *= 2; \
Daniel Veillard6454aec1999-09-02 22:04:43 +0000283 ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000284 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
285 if (ctxt->name##Tab == NULL) { \
286 fprintf(stderr, "realloc failed !\n"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000287 return(0); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000288 } \
289 } \
290 ctxt->name##Tab[ctxt->name##Nr] = value; \
291 ctxt->name = value; \
292 return(ctxt->name##Nr++); \
293} \
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000294scope type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000295 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000296 if (ctxt->name##Nr <= 0) return(0); \
297 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000298 if (ctxt->name##Nr > 0) \
299 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
300 else \
301 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000302 ret = ctxt->name##Tab[ctxt->name##Nr]; \
303 ctxt->name##Tab[ctxt->name##Nr] = 0; \
304 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000305} \
306
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000307PUSH_AND_POP(extern, xmlParserInputPtr, input)
308PUSH_AND_POP(extern, xmlNodePtr, node)
309PUSH_AND_POP(extern, xmlChar*, name)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000310
Daniel Veillardcf461992000-03-14 18:30:20 +0000311int spacePush(xmlParserCtxtPtr ctxt, int val) {
312 if (ctxt->spaceNr >= ctxt->spaceMax) {
313 ctxt->spaceMax *= 2;
314 ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
315 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
316 if (ctxt->spaceTab == NULL) {
317 fprintf(stderr, "realloc failed !\n");
318 return(0);
319 }
320 }
321 ctxt->spaceTab[ctxt->spaceNr] = val;
322 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
323 return(ctxt->spaceNr++);
324}
325
326int spacePop(xmlParserCtxtPtr ctxt) {
327 int ret;
328 if (ctxt->spaceNr <= 0) return(0);
329 ctxt->spaceNr--;
330 if (ctxt->spaceNr > 0)
331 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
332 else
333 ctxt->space = NULL;
334 ret = ctxt->spaceTab[ctxt->spaceNr];
335 ctxt->spaceTab[ctxt->spaceNr] = -1;
336 return(ret);
337}
338
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000339/*
340 * Macros for accessing the content. Those should be used only by the parser,
341 * and not exported.
342 *
343 * Dirty macros, i.e. one need to make assumption on the context to use them
344 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000345 * CUR_PTR return the current pointer to the xmlChar to be parsed.
Daniel Veillardcf461992000-03-14 18:30:20 +0000346 * To be used with extreme caution since operations consuming
347 * characters may move the input buffer to a different location !
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000348 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
Daniel Veillardcf461992000-03-14 18:30:20 +0000349 * in ISO-Latin or UTF-8.
350 * This should be used internally by the parser
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000351 * only to compare to ASCII values otherwise it would break when
352 * running with UTF-8 encoding.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000353 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000354 * to compare on ASCII based substring.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000355 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000356 * strings within the parser.
357 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000358 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000359 *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000360 * NEXT Skip to the next character, this does the proper decoding
361 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard011b63c1999-06-02 17:44:04 +0000362 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
Daniel Veillardcf461992000-03-14 18:30:20 +0000363 * CUR_CHAR Return the current char as an int as well as its lenght.
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000364 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000365
Daniel Veillardcf461992000-03-14 18:30:20 +0000366#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000367#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000368#define NXT(val) ctxt->input->cur[(val)]
369#define CUR_PTR ctxt->input->cur
Daniel Veillardcf461992000-03-14 18:30:20 +0000370
371#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
372 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
373 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
374 if ((*ctxt->input->cur == 0) && \
375 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
376 xmlPopInput(ctxt)
377
Daniel Veillardb05deb71999-08-10 19:04:08 +0000378#define SHRINK xmlParserInputShrink(ctxt->input); \
379 if ((*ctxt->input->cur == 0) && \
380 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
381 xmlPopInput(ctxt)
382
383#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
384 if ((*ctxt->input->cur == 0) && \
385 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
386 xmlPopInput(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000387
Daniel Veillardcf461992000-03-14 18:30:20 +0000388#define SKIP_BLANKS xmlSkipBlankChars(ctxt);
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000389
Daniel Veillardcf461992000-03-14 18:30:20 +0000390#define NEXT xmlNextChar(ctxt);
391
392#define NEXTL(l) \
393 if (*(ctxt->input->cur) == '\n') { \
394 ctxt->input->line++; ctxt->input->col = 1; \
395 } else ctxt->input->col++; \
396 ctxt->token = 0; ctxt->input->cur += l; \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000397 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillardcf461992000-03-14 18:30:20 +0000398 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000399
Daniel Veillardcf461992000-03-14 18:30:20 +0000400#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
401#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
402
403#define COPY_BUF(l,b,i,v) \
404 if (l == 1) b[i++] = (xmlChar) v; \
405 else i += xmlCopyChar(l,&b[i],v);
406
407/**
408 * xmlNextChar:
409 * @ctxt: the XML parser context
410 *
411 * Skip to the next char input char.
412 */
413
414void
415xmlNextChar(xmlParserCtxtPtr ctxt) {
416 /*
417 * TODO: 2.11 End-of-Line Handling
418 * the literal two-character sequence "#xD#xA" or a standalone
419 * literal #xD, an XML processor must pass to the application
420 * the single character #xA.
421 */
422 if (ctxt->token != 0) ctxt->token = 0;
423 else {
424 if ((*ctxt->input->cur == 0) &&
425 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
426 (ctxt->instate != XML_PARSER_COMMENT)) {
427 /*
428 * If we are at the end of the current entity and
429 * the context allows it, we pop consumed entities
430 * automatically.
431 * TODO: the auto closing should be blocked in other cases
432 */
433 xmlPopInput(ctxt);
434 } else {
435 if (*(ctxt->input->cur) == '\n') {
436 ctxt->input->line++; ctxt->input->col = 1;
437 } else ctxt->input->col++;
438 if (ctxt->encoding == NULL) {
439 /*
440 * We are supposed to handle UTF8, check it's valid
441 * From rfc2044: encoding of the Unicode values on UTF-8:
442 *
443 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
444 * 0000 0000-0000 007F 0xxxxxxx
445 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
446 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
447 *
448 * Check for the 0x110000 limit too
449 */
450 const unsigned char *cur = ctxt->input->cur;
451 unsigned char c;
452
453 c = *cur;
454 if (c & 0x80) {
455 if (cur[1] == 0)
456 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
457 if ((cur[1] & 0xc0) != 0x80)
458 goto encoding_error;
459 if ((c & 0xe0) == 0xe0) {
460 unsigned int val;
461
462 if (cur[2] == 0)
463 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
464 if ((cur[2] & 0xc0) != 0x80)
465 goto encoding_error;
466 if ((c & 0xf0) == 0xf0) {
467 if (cur[3] == 0)
468 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
469 if (((c & 0xf8) != 0xf0) ||
470 ((cur[3] & 0xc0) != 0x80))
471 goto encoding_error;
472 /* 4-byte code */
473 ctxt->input->cur += 4;
474 val = (cur[0] & 0x7) << 18;
475 val |= (cur[1] & 0x3f) << 12;
476 val |= (cur[2] & 0x3f) << 6;
477 val |= cur[3] & 0x3f;
478 } else {
479 /* 3-byte code */
480 ctxt->input->cur += 3;
481 val = (cur[0] & 0xf) << 12;
482 val |= (cur[1] & 0x3f) << 6;
483 val |= cur[2] & 0x3f;
484 }
485 if (((val > 0xd7ff) && (val < 0xe000)) ||
486 ((val > 0xfffd) && (val < 0x10000)) ||
487 (val >= 0x110000)) {
488 if ((ctxt->sax != NULL) &&
489 (ctxt->sax->error != NULL))
490 ctxt->sax->error(ctxt->userData,
491 "Char out of allowed range\n");
492 ctxt->errNo = XML_ERR_INVALID_ENCODING;
493 ctxt->wellFormed = 0;
494 ctxt->disableSAX = 1;
495 }
496 } else
497 /* 2-byte code */
498 ctxt->input->cur += 2;
499 } else
500 /* 1-byte code */
501 ctxt->input->cur++;
502 } else {
503 /*
504 * Assume it's a fixed lenght encoding (1) with
505 * a compatibke encoding for the ASCII set, since
506 * XML constructs only use < 128 chars
507 */
508 ctxt->input->cur++;
509 }
510 ctxt->nbChars++;
511 if (*ctxt->input->cur == 0)
512 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
513 }
514 }
515 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
516 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
517 if ((*ctxt->input->cur == 0) &&
518 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
519 xmlPopInput(ctxt);
520 return;
521encoding_error:
522 /*
523 * If we detect an UTF8 error that probably mean that the
524 * input encoding didn't get properly advertized in the
525 * declaration header. Report the error and switch the encoding
526 * to ISO-Latin-1 (if you don't like this policy, just declare the
527 * encoding !)
528 */
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530 ctxt->sax->error(ctxt->userData,
531 "Input is not proper UTF-8, indicate encoding !\n");
532 ctxt->errNo = XML_ERR_INVALID_ENCODING;
533
534 ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
535 ctxt->input->cur++;
536 return;
537}
538
539/**
540 * xmlCurrentChar:
541 * @ctxt: the XML parser context
542 * @len: pointer to the length of the char read
543 *
544 * The current char value, if using UTF-8 this may actaully span multiple
545 * bytes in the input buffer. Implement the end of line normalization:
546 * 2.11 End-of-Line Handling
547 * Wherever an external parsed entity or the literal entity value
548 * of an internal parsed entity contains either the literal two-character
549 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
550 * must pass to the application the single character #xA.
551 * This behavior can conveniently be produced by normalizing all
552 * line breaks to #xA on input, before parsing.)
553 *
554 * Returns the current char value and its lenght
555 */
556
557int
558xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
559 if (ctxt->token != 0) {
560 *len = 0;
561 return(ctxt->token);
562 }
563 if (ctxt->encoding == NULL) {
564 /*
565 * We are supposed to handle UTF8, check it's valid
566 * From rfc2044: encoding of the Unicode values on UTF-8:
567 *
568 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
569 * 0000 0000-0000 007F 0xxxxxxx
570 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
571 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
572 *
573 * Check for the 0x110000 limit too
574 */
575 const unsigned char *cur = ctxt->input->cur;
576 unsigned char c;
577 unsigned int val;
578
579 c = *cur;
580 if (c & 0x80) {
581 if (cur[1] == 0)
582 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
583 if ((cur[1] & 0xc0) != 0x80)
584 goto encoding_error;
585 if ((c & 0xe0) == 0xe0) {
586
587 if (cur[2] == 0)
588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
589 if ((cur[2] & 0xc0) != 0x80)
590 goto encoding_error;
591 if ((c & 0xf0) == 0xf0) {
592 if (cur[3] == 0)
593 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
594 if (((c & 0xf8) != 0xf0) ||
595 ((cur[3] & 0xc0) != 0x80))
596 goto encoding_error;
597 /* 4-byte code */
598 *len = 4;
599 val = (cur[0] & 0x7) << 18;
600 val |= (cur[1] & 0x3f) << 12;
601 val |= (cur[2] & 0x3f) << 6;
602 val |= cur[3] & 0x3f;
603 } else {
604 /* 3-byte code */
605 *len = 3;
606 val = (cur[0] & 0xf) << 12;
607 val |= (cur[1] & 0x3f) << 6;
608 val |= cur[2] & 0x3f;
609 }
610 } else {
611 /* 2-byte code */
612 *len = 2;
613 val = (cur[0] & 0x1f) << 6;
614 val |= cur[1] & 0x3f;
615 }
616 if (!IS_CHAR(val)) {
617 if ((ctxt->sax != NULL) &&
618 (ctxt->sax->error != NULL))
619 ctxt->sax->error(ctxt->userData,
620 "Char out of allowed range\n");
621 ctxt->errNo = XML_ERR_INVALID_ENCODING;
622 ctxt->wellFormed = 0;
623 ctxt->disableSAX = 1;
624 }
625 return(val);
626 } else {
627 /* 1-byte code */
628 *len = 1;
629 if (*ctxt->input->cur == 0xD) {
630 if (ctxt->input->cur[1] == 0xA) {
631 ctxt->nbChars++;
632 ctxt->input->cur++;
633 }
634 return(0xA);
635 }
636 return((int) *ctxt->input->cur);
637 }
638 }
639 /*
640 * Assume it's a fixed lenght encoding (1) with
641 * a compatibke encoding for the ASCII set, since
642 * XML constructs only use < 128 chars
643 */
644 *len = 1;
645 if (*ctxt->input->cur == 0xD) {
646 if (ctxt->input->cur[1] == 0xA) {
647 ctxt->nbChars++;
648 ctxt->input->cur++;
649 }
650 return(0xA);
651 }
652 return((int) *ctxt->input->cur);
653encoding_error:
654 /*
655 * If we detect an UTF8 error that probably mean that the
656 * input encoding didn't get properly advertized in the
657 * declaration header. Report the error and switch the encoding
658 * to ISO-Latin-1 (if you don't like this policy, just declare the
659 * encoding !)
660 */
661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
662 ctxt->sax->error(ctxt->userData,
663 "Input is not proper UTF-8, indicate encoding !\n");
664 ctxt->errNo = XML_ERR_INVALID_ENCODING;
665
666 ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
667 *len = 1;
668 return((int) *ctxt->input->cur);
669}
670
671/**
672 * xmlStringCurrentChar:
673 * @ctxt: the XML parser context
674 * @cur: pointer to the beginning of the char
675 * @len: pointer to the length of the char read
676 *
677 * The current char value, if using UTF-8 this may actaully span multiple
678 * bytes in the input buffer.
679 *
680 * Returns the current char value and its lenght
681 */
682
683int
684xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
685 if (ctxt->encoding == NULL) {
686 /*
687 * We are supposed to handle UTF8, check it's valid
688 * From rfc2044: encoding of the Unicode values on UTF-8:
689 *
690 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
691 * 0000 0000-0000 007F 0xxxxxxx
692 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
693 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
694 *
695 * Check for the 0x110000 limit too
696 */
697 unsigned char c;
698 unsigned int val;
699
700 c = *cur;
701 if (c & 0x80) {
702 if ((cur[1] & 0xc0) != 0x80)
703 goto encoding_error;
704 if ((c & 0xe0) == 0xe0) {
705
706 if ((cur[2] & 0xc0) != 0x80)
707 goto encoding_error;
708 if ((c & 0xf0) == 0xf0) {
709 if (((c & 0xf8) != 0xf0) ||
710 ((cur[3] & 0xc0) != 0x80))
711 goto encoding_error;
712 /* 4-byte code */
713 *len = 4;
714 val = (cur[0] & 0x7) << 18;
715 val |= (cur[1] & 0x3f) << 12;
716 val |= (cur[2] & 0x3f) << 6;
717 val |= cur[3] & 0x3f;
718 } else {
719 /* 3-byte code */
720 *len = 3;
721 val = (cur[0] & 0xf) << 12;
722 val |= (cur[1] & 0x3f) << 6;
723 val |= cur[2] & 0x3f;
724 }
725 } else {
726 /* 2-byte code */
727 *len = 2;
728 val = (cur[0] & 0x1f) << 6;
729 val |= cur[2] & 0x3f;
730 }
731 if (!IS_CHAR(val)) {
732 if ((ctxt->sax != NULL) &&
733 (ctxt->sax->error != NULL))
734 ctxt->sax->error(ctxt->userData,
735 "Char out of allowed range\n");
736 ctxt->errNo = XML_ERR_INVALID_ENCODING;
737 ctxt->wellFormed = 0;
738 ctxt->disableSAX = 1;
739 }
740 return(val);
741 } else {
742 /* 1-byte code */
743 *len = 1;
744 return((int) *cur);
745 }
746 }
747 /*
748 * Assume it's a fixed lenght encoding (1) with
749 * a compatibke encoding for the ASCII set, since
750 * XML constructs only use < 128 chars
751 */
752 *len = 1;
753 return((int) *cur);
754encoding_error:
755 /*
756 * If we detect an UTF8 error that probably mean that the
757 * input encoding didn't get properly advertized in the
758 * declaration header. Report the error and switch the encoding
759 * to ISO-Latin-1 (if you don't like this policy, just declare the
760 * encoding !)
761 */
762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
763 ctxt->sax->error(ctxt->userData,
764 "Input is not proper UTF-8, indicate encoding !\n");
765 ctxt->errNo = XML_ERR_INVALID_ENCODING;
766
767 *len = 1;
768 return((int) *cur);
769}
770
771/**
772 * xmlCopyChar:
773 * @len: pointer to the length of the char read (or zero)
774 * @array: pointer to an arry of xmlChar
775 * @val: the char value
776 *
777 * append the char value in the array
778 *
779 * Returns the number of xmlChar written
780 */
781
782int
783xmlCopyChar(int len, xmlChar *out, int val) {
784 /*
785 * We are supposed to handle UTF8, check it's valid
786 * From rfc2044: encoding of the Unicode values on UTF-8:
787 *
788 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
789 * 0000 0000-0000 007F 0xxxxxxx
790 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
791 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
792 */
793 if (len == 0) {
794 if (val < 0) len = 0;
795 else if (val < 0x80) len = 1;
796 else if (val < 0x800) len = 2;
797 else if (val < 0x10000) len = 3;
798 else if (val < 0x110000) len = 4;
799 if (len == 0) {
800 fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
801 val);
802 return(0);
803 }
804 }
805 if (len > 1) {
806 int bits;
807
808 if (val < 0x80) { *out++= val; bits= -6; }
809 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
810 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
811 else { *out++= (val >> 18) | 0xF0; bits= 12; }
812
813 for ( ; bits >= 0; bits-= 6)
814 *out++= ((val >> bits) & 0x3F) | 0x80 ;
815
816 return(len);
817 }
818 *out = (xmlChar) val;
819 return(1);
820}
821
822/**
823 * xmlSkipBlankChars:
824 * @ctxt: the XML parser context
825 *
826 * skip all blanks character found at that point in the input streams.
827 * It pops up finished entities in the process if allowable at that point.
828 *
829 * Returns the number of space chars skipped
830 */
831
832int
833xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
834 int cur, res = 0;
835
836 do {
837 cur = CUR;
838 while (IS_BLANK(cur)) {
839 NEXT;
840 cur = CUR;
841 res++;
842 }
843 while ((cur == 0) && (ctxt->inputNr > 1) &&
844 (ctxt->instate != XML_PARSER_COMMENT)) {
845 xmlPopInput(ctxt);
846 cur = CUR;
847 }
848 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
849 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
850 } while (IS_BLANK(cur));
851 return(res);
852}
Daniel Veillard260a68f1998-08-13 03:39:55 +0000853
Daniel Veillardb05deb71999-08-10 19:04:08 +0000854/************************************************************************
855 * *
856 * Commodity functions to handle entities processing *
857 * *
858 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +0000859
Daniel Veillard11e00581998-10-24 18:27:49 +0000860/**
861 * xmlPopInput:
862 * @ctxt: an XML parser context
863 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000864 * xmlPopInput: the current input pointed by ctxt->input came to an end
865 * pop it and return the next char.
866 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000867 * Returns the current xmlChar in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +0000868 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000869xmlChar
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000870xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000871 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardbc50b591999-03-01 12:28:53 +0000872 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000873 if ((*ctxt->input->cur == 0) &&
874 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
875 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +0000876 return(CUR);
877}
878
Daniel Veillard11e00581998-10-24 18:27:49 +0000879/**
880 * xmlPushInput:
881 * @ctxt: an XML parser context
882 * @input: an XML parser input fragment (entity, XML fragment ...).
883 *
Daniel Veillard260a68f1998-08-13 03:39:55 +0000884 * xmlPushInput: switch to a new input stream which is stacked on top
885 * of the previous one(s).
886 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000887void
888xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000889 if (input == NULL) return;
890 inputPush(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +0000891 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000892}
893
Daniel Veillard11e00581998-10-24 18:27:49 +0000894/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000895 * xmlFreeInputStream:
Daniel Veillard51e3b151999-11-12 17:02:31 +0000896 * @input: an xmlParserInputPtr
Daniel Veillardd692aa41999-02-28 21:54:31 +0000897 *
898 * Free up an input stream.
899 */
900void
901xmlFreeInputStream(xmlParserInputPtr input) {
902 if (input == NULL) return;
903
Daniel Veillard6454aec1999-09-02 22:04:43 +0000904 if (input->filename != NULL) xmlFree((char *) input->filename);
905 if (input->directory != NULL) xmlFree((char *) input->directory);
Daniel Veillardcf461992000-03-14 18:30:20 +0000906 if (input->encoding != NULL) xmlFree((char *) input->encoding);
907 if (input->version != NULL) xmlFree((char *) input->version);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000908 if ((input->free != NULL) && (input->base != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000909 input->free((xmlChar *) input->base);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000910 if (input->buf != NULL)
911 xmlFreeParserInputBuffer(input->buf);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000912 memset(input, -1, sizeof(xmlParserInput));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000913 xmlFree(input);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000914}
915
916/**
Daniel Veillardb05deb71999-08-10 19:04:08 +0000917 * xmlNewInputStream:
918 * @ctxt: an XML parser context
919 *
920 * Create a new input stream structure
921 * Returns the new input stream or NULL
922 */
923xmlParserInputPtr
924xmlNewInputStream(xmlParserCtxtPtr ctxt) {
925 xmlParserInputPtr input;
926
Daniel Veillard6454aec1999-09-02 22:04:43 +0000927 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000928 if (input == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000929 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000931 ctxt->sax->error(ctxt->userData,
932 "malloc: couldn't allocate a new input stream\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000933 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000934 return(NULL);
935 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000936 memset(input, 0, sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +0000937 input->line = 1;
938 input->col = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +0000939 input->standalone = -1;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000940 return(input);
941}
942
943/**
Daniel Veillard11e00581998-10-24 18:27:49 +0000944 * xmlNewEntityInputStream:
945 * @ctxt: an XML parser context
946 * @entity: an Entity pointer
947 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000948 * Create a new input stream based on an xmlEntityPtr
Daniel Veillardb96e6431999-08-29 21:02:19 +0000949 *
950 * Returns the new input stream or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +0000951 */
Daniel Veillardccb09631998-10-27 06:21:04 +0000952xmlParserInputPtr
953xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000954 xmlParserInputPtr input;
955
956 if (entity == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000957 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +0000958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +0000959 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000960 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000961 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardccb09631998-10-27 06:21:04 +0000962 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000963 }
964 if (entity->content == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000965 switch (entity->etype) {
Daniel Veillardb96e6431999-08-29 21:02:19 +0000966 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000967 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb96e6431999-08-29 21:02:19 +0000968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
969 ctxt->sax->error(ctxt->userData,
970 "xmlNewEntityInputStream unparsed entity !\n");
971 break;
972 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
973 case XML_EXTERNAL_PARAMETER_ENTITY:
974 return(xmlLoadExternalEntity((char *) entity->SystemID,
Daniel Veillard686d6b62000-01-03 11:08:02 +0000975 (char *) entity->ExternalID, ctxt));
Daniel Veillardb96e6431999-08-29 21:02:19 +0000976 case XML_INTERNAL_GENERAL_ENTITY:
977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
978 ctxt->sax->error(ctxt->userData,
979 "Internal entity %s without content !\n", entity->name);
980 break;
981 case XML_INTERNAL_PARAMETER_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000982 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +0000983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
984 ctxt->sax->error(ctxt->userData,
985 "Internal parameter entity %s without content !\n", entity->name);
986 break;
987 case XML_INTERNAL_PREDEFINED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000988 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +0000989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
990 ctxt->sax->error(ctxt->userData,
991 "Predefined entity %s without content !\n", entity->name);
992 break;
993 }
Daniel Veillardccb09631998-10-27 06:21:04 +0000994 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000995 }
Daniel Veillardb05deb71999-08-10 19:04:08 +0000996 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000997 if (input == NULL) {
Daniel Veillardccb09631998-10-27 06:21:04 +0000998 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000999 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001000 input->filename = (char *) entity->SystemID;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001001 input->base = entity->content;
1002 input->cur = entity->content;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001003 input->length = entity->length;
Daniel Veillardccb09631998-10-27 06:21:04 +00001004 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001005}
1006
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001007/**
1008 * xmlNewStringInputStream:
1009 * @ctxt: an XML parser context
Daniel Veillardb05deb71999-08-10 19:04:08 +00001010 * @buffer: an memory buffer
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001011 *
1012 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +00001013 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001014 */
1015xmlParserInputPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001016xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001017 xmlParserInputPtr input;
1018
Daniel Veillardb05deb71999-08-10 19:04:08 +00001019 if (buffer == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001020 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001022 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001023 "internal: xmlNewStringInputStream string = NULL\n");
1024 return(NULL);
1025 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001026 input = xmlNewInputStream(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001027 if (input == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001028 return(NULL);
1029 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001030 input->base = buffer;
1031 input->cur = buffer;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001032 input->length = xmlStrlen(buffer);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001033 return(input);
1034}
1035
Daniel Veillard011b63c1999-06-02 17:44:04 +00001036/**
1037 * xmlNewInputFromFile:
1038 * @ctxt: an XML parser context
1039 * @filename: the filename to use as entity
1040 *
1041 * Create a new input stream based on a file.
1042 *
1043 * Returns the new input stream or NULL in case of error
1044 */
1045xmlParserInputPtr
1046xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001047 xmlParserInputBufferPtr buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001048 xmlParserInputPtr inputStream;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001049 char *directory = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001050
Daniel Veillardb05deb71999-08-10 19:04:08 +00001051 if (ctxt == NULL) return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001052 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001053 if (buf == NULL) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001054 char name[XML_PARSER_BIG_BUFFER_SIZE];
Daniel Veillard011b63c1999-06-02 17:44:04 +00001055
Daniel Veillardb05deb71999-08-10 19:04:08 +00001056 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1057#ifdef WIN32
1058 sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1059#else
1060 sprintf(name, "%s/%s", ctxt->input->directory, filename);
1061#endif
1062 buf = xmlParserInputBufferCreateFilename(name,
1063 XML_CHAR_ENCODING_NONE);
1064 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +00001065 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001066 }
1067 if ((buf == NULL) && (ctxt->directory != NULL)) {
1068#ifdef WIN32
1069 sprintf(name, "%s\\%s", ctxt->directory, filename);
1070#else
1071 sprintf(name, "%s/%s", ctxt->directory, filename);
1072#endif
1073 buf = xmlParserInputBufferCreateFilename(name,
1074 XML_CHAR_ENCODING_NONE);
1075 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +00001076 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001077 }
1078 if (buf == NULL)
1079 return(NULL);
1080 }
1081 if (directory == NULL)
1082 directory = xmlParserGetDirectory(filename);
1083
1084 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001085 if (inputStream == NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00001086 if (directory != NULL) xmlFree((char *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001087 return(NULL);
1088 }
1089
Daniel Veillard6454aec1999-09-02 22:04:43 +00001090 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001091 inputStream->directory = directory;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001092 inputStream->buf = buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001093
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001094 inputStream->base = inputStream->buf->buffer->content;
1095 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001096 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillard294cbca1999-12-03 13:19:09 +00001097 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001098 return(inputStream);
1099}
1100
1101/************************************************************************
1102 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00001103 * Commodity functions to handle parser contexts *
1104 * *
1105 ************************************************************************/
1106
1107/**
1108 * xmlInitParserCtxt:
1109 * @ctxt: an XML parser context
1110 *
1111 * Initialize a parser context
1112 */
1113
1114void
1115xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1116{
1117 xmlSAXHandler *sax;
1118
Daniel Veillardcf461992000-03-14 18:30:20 +00001119 xmlDefaultSAXHandlerInit();
1120
Daniel Veillard6454aec1999-09-02 22:04:43 +00001121 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001122 if (sax == NULL) {
1123 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1124 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001125 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001126
1127 /* Allocate the Input stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +00001128 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001129 ctxt->inputNr = 0;
1130 ctxt->inputMax = 5;
1131 ctxt->input = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001132
Daniel Veillardb05deb71999-08-10 19:04:08 +00001133 ctxt->version = NULL;
1134 ctxt->encoding = NULL;
1135 ctxt->standalone = -1;
1136 ctxt->hasExternalSubset = 0;
1137 ctxt->hasPErefs = 0;
1138 ctxt->html = 0;
1139 ctxt->external = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001140 ctxt->instate = XML_PARSER_START;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001141 ctxt->token = 0;
1142 ctxt->directory = NULL;
1143
1144 /* Allocate the Node stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +00001145 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001146 ctxt->nodeNr = 0;
1147 ctxt->nodeMax = 10;
1148 ctxt->node = NULL;
1149
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001150 /* Allocate the Name stack */
1151 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1152 ctxt->nameNr = 0;
1153 ctxt->nameMax = 10;
1154 ctxt->name = NULL;
1155
Daniel Veillardcf461992000-03-14 18:30:20 +00001156 /* Allocate the space stack */
1157 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1158 ctxt->spaceNr = 1;
1159 ctxt->spaceMax = 10;
1160 ctxt->spaceTab[0] = -1;
1161 ctxt->space = &ctxt->spaceTab[0];
1162
1163 if (sax == NULL) {
1164 ctxt->sax = &xmlDefaultSAXHandler;
1165 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001166 ctxt->sax = sax;
1167 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1168 }
1169 ctxt->userData = ctxt;
1170 ctxt->myDoc = NULL;
1171 ctxt->wellFormed = 1;
1172 ctxt->valid = 1;
1173 ctxt->validate = xmlDoValidityCheckingDefaultValue;
Daniel Veillard83a30e72000-03-02 03:33:32 +00001174 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001175 ctxt->vctxt.userData = ctxt;
Daniel Veillard5feb8492000-02-02 17:15:36 +00001176 if (ctxt->validate) {
1177 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillardcf461992000-03-14 18:30:20 +00001178 if (xmlGetWarningsDefaultValue == 0)
1179 ctxt->vctxt.warning = NULL;
1180 else
1181 ctxt->vctxt.warning = xmlParserValidityWarning;
1182 /* Allocate the Node stack */
1183 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1184 ctxt->vctxt.nodeNr = 0;
1185 ctxt->vctxt.nodeMax = 4;
1186 ctxt->vctxt.node = NULL;
Daniel Veillard5feb8492000-02-02 17:15:36 +00001187 } else {
1188 ctxt->vctxt.error = NULL;
1189 ctxt->vctxt.warning = NULL;
1190 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001191 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1192 ctxt->record_info = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001193 ctxt->nbChars = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001194 ctxt->checkIndex = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001195 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001196 ctxt->errNo = XML_ERR_OK;
Daniel Veillardcf461992000-03-14 18:30:20 +00001197 ctxt->depth = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001198 xmlInitNodeInfoSeq(&ctxt->node_seq);
1199}
1200
1201/**
1202 * xmlFreeParserCtxt:
1203 * @ctxt: an XML parser context
1204 *
1205 * Free all the memory used by a parser context. However the parsed
1206 * document in ctxt->myDoc is not freed.
1207 */
1208
1209void
1210xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1211{
1212 xmlParserInputPtr input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001213 xmlChar *oldname;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001214
1215 if (ctxt == NULL) return;
1216
1217 while ((input = inputPop(ctxt)) != NULL) {
1218 xmlFreeInputStream(input);
1219 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001220 while ((oldname = namePop(ctxt)) != NULL) {
1221 xmlFree(oldname);
1222 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001223 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001224 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
Daniel Veillard6454aec1999-09-02 22:04:43 +00001225 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1226 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1227 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1228 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Daniel Veillardcf461992000-03-14 18:30:20 +00001229 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1230 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1231 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1232 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001233 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
Daniel Veillard6454aec1999-09-02 22:04:43 +00001234 xmlFree(ctxt->sax);
1235 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1236 xmlFree(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001237}
1238
1239/**
1240 * xmlNewParserCtxt:
1241 *
1242 * Allocate and initialize a new parser context.
1243 *
1244 * Returns the xmlParserCtxtPtr or NULL
1245 */
1246
1247xmlParserCtxtPtr
1248xmlNewParserCtxt()
1249{
1250 xmlParserCtxtPtr ctxt;
1251
Daniel Veillard6454aec1999-09-02 22:04:43 +00001252 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001253 if (ctxt == NULL) {
1254 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1255 perror("malloc");
1256 return(NULL);
1257 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001258 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001259 xmlInitParserCtxt(ctxt);
1260 return(ctxt);
1261}
1262
1263/**
1264 * xmlClearParserCtxt:
1265 * @ctxt: an XML parser context
1266 *
1267 * Clear (release owned resources) and reinitialize a parser context
1268 */
1269
1270void
1271xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1272{
1273 xmlClearNodeInfoSeq(&ctxt->node_seq);
1274 xmlInitParserCtxt(ctxt);
1275}
1276
1277/************************************************************************
1278 * *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001279 * Commodity functions to handle entities *
1280 * *
1281 ************************************************************************/
1282
Daniel Veillardcf461992000-03-14 18:30:20 +00001283/**
1284 * xmlCheckEntity:
1285 * @ctxt: an XML parser context
1286 * @content: the entity content string
1287 *
1288 * Parse an entity content and checks the WF constraints
1289 *
1290 */
1291
1292void
1293xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1294}
Daniel Veillardb05deb71999-08-10 19:04:08 +00001295
1296/**
1297 * xmlParseCharRef:
1298 * @ctxt: an XML parser context
1299 *
1300 * parse Reference declarations
1301 *
1302 * [66] CharRef ::= '&#' [0-9]+ ';' |
1303 * '&#x' [0-9a-fA-F]+ ';'
1304 *
1305 * [ WFC: Legal Character ]
1306 * Characters referred to using character references must match the
1307 * production for Char.
1308 *
Daniel Veillard10a2c651999-12-12 13:03:50 +00001309 * Returns the value parsed (as an int), 0 in case of error
Daniel Veillardb05deb71999-08-10 19:04:08 +00001310 */
1311int
1312xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1313 int val = 0;
1314
1315 if (ctxt->token != 0) {
1316 val = ctxt->token;
1317 ctxt->token = 0;
1318 return(val);
1319 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001320 if ((RAW == '&') && (NXT(1) == '#') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00001321 (NXT(2) == 'x')) {
1322 SKIP(3);
Daniel Veillardcf461992000-03-14 18:30:20 +00001323 while (RAW != ';') {
1324 if ((RAW >= '0') && (RAW <= '9'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001325 val = val * 16 + (CUR - '0');
Daniel Veillardcf461992000-03-14 18:30:20 +00001326 else if ((RAW >= 'a') && (RAW <= 'f'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001327 val = val * 16 + (CUR - 'a') + 10;
Daniel Veillardcf461992000-03-14 18:30:20 +00001328 else if ((RAW >= 'A') && (RAW <= 'F'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001329 val = val * 16 + (CUR - 'A') + 10;
1330 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001331 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1333 ctxt->sax->error(ctxt->userData,
1334 "xmlParseCharRef: invalid hexadecimal value\n");
1335 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001336 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001337 val = 0;
1338 break;
1339 }
1340 NEXT;
1341 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001342 if (RAW == ';') {
1343 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1344 ctxt->nbChars ++;
1345 ctxt->input->cur++;
1346 }
1347 } else if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001348 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00001349 while (RAW != ';') {
1350 if ((RAW >= '0') && (RAW <= '9'))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001351 val = val * 10 + (CUR - '0');
1352 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001353 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1355 ctxt->sax->error(ctxt->userData,
1356 "xmlParseCharRef: invalid decimal value\n");
1357 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001358 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001359 val = 0;
1360 break;
1361 }
1362 NEXT;
1363 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001364 if (RAW == ';') {
1365 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1366 ctxt->nbChars ++;
1367 ctxt->input->cur++;
1368 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001369 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001370 ctxt->errNo = XML_ERR_INVALID_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1372 ctxt->sax->error(ctxt->userData,
1373 "xmlParseCharRef: invalid value\n");
1374 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001375 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001376 }
1377
1378 /*
1379 * [ WFC: Legal Character ]
1380 * Characters referred to using character references must match the
1381 * production for Char.
1382 */
1383 if (IS_CHAR(val)) {
1384 return(val);
1385 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001386 ctxt->errNo = XML_ERR_INVALID_CHAR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001388 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
Daniel Veillardb05deb71999-08-10 19:04:08 +00001389 val);
1390 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001391 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001392 }
1393 return(0);
1394}
1395
1396/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00001397 * xmlParseStringCharRef:
1398 * @ctxt: an XML parser context
1399 * @str: a pointer to an index in the string
1400 *
1401 * parse Reference declarations, variant parsing from a string rather
1402 * than an an input flow.
1403 *
1404 * [66] CharRef ::= '&#' [0-9]+ ';' |
1405 * '&#x' [0-9a-fA-F]+ ';'
1406 *
1407 * [ WFC: Legal Character ]
1408 * Characters referred to using character references must match the
1409 * production for Char.
1410 *
1411 * Returns the value parsed (as an int), 0 in case of error, str will be
1412 * updated to the current value of the index
1413 */
1414int
1415xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1416 const xmlChar *ptr;
1417 xmlChar cur;
1418 int val = 0;
1419
1420 if ((str == NULL) || (*str == NULL)) return(0);
1421 ptr = *str;
1422 cur = *ptr;
Daniel Veillard0caf07a1999-12-21 16:25:49 +00001423 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001424 ptr += 3;
1425 cur = *ptr;
1426 while (cur != ';') {
1427 if ((cur >= '0') && (cur <= '9'))
1428 val = val * 16 + (cur - '0');
1429 else if ((cur >= 'a') && (cur <= 'f'))
1430 val = val * 16 + (cur - 'a') + 10;
1431 else if ((cur >= 'A') && (cur <= 'F'))
1432 val = val * 16 + (cur - 'A') + 10;
1433 else {
1434 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1436 ctxt->sax->error(ctxt->userData,
1437 "xmlParseCharRef: invalid hexadecimal value\n");
1438 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001439 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001440 val = 0;
1441 break;
1442 }
1443 ptr++;
1444 cur = *ptr;
1445 }
1446 if (cur == ';')
1447 ptr++;
Daniel Veillard0142b842000-01-14 14:45:24 +00001448 } else if ((cur == '&') && (ptr[1] == '#')){
Daniel Veillard10a2c651999-12-12 13:03:50 +00001449 ptr += 2;
1450 cur = *ptr;
1451 while (cur != ';') {
1452 if ((cur >= '0') && (cur <= '9'))
1453 val = val * 10 + (cur - '0');
1454 else {
1455 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1457 ctxt->sax->error(ctxt->userData,
1458 "xmlParseCharRef: invalid decimal value\n");
1459 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001460 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001461 val = 0;
1462 break;
1463 }
1464 ptr++;
1465 cur = *ptr;
1466 }
1467 if (cur == ';')
1468 ptr++;
1469 } else {
1470 ctxt->errNo = XML_ERR_INVALID_CHARREF;
1471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1472 ctxt->sax->error(ctxt->userData,
1473 "xmlParseCharRef: invalid value\n");
1474 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001475 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001476 return(0);
1477 }
1478 *str = ptr;
1479
1480 /*
1481 * [ WFC: Legal Character ]
1482 * Characters referred to using character references must match the
1483 * production for Char.
1484 */
1485 if (IS_CHAR(val)) {
1486 return(val);
1487 } else {
1488 ctxt->errNo = XML_ERR_INVALID_CHAR;
1489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1490 ctxt->sax->error(ctxt->userData,
1491 "CharRef: invalid xmlChar value %d\n", val);
1492 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001493 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001494 }
1495 return(0);
1496}
1497
1498/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00001499 * xmlParserHandleReference:
1500 * @ctxt: the parser context
1501 *
1502 * [67] Reference ::= EntityRef | CharRef
1503 *
1504 * [68] EntityRef ::= '&' Name ';'
1505 *
1506 * [ WFC: Entity Declared ]
1507 * the Name given in the entity reference must match that in an entity
1508 * declaration, except that well-formed documents need not declare any
1509 * of the following entities: amp, lt, gt, apos, quot.
1510 *
1511 * [ WFC: Parsed Entity ]
1512 * An entity reference must not contain the name of an unparsed entity
1513 *
1514 * [66] CharRef ::= '&#' [0-9]+ ';' |
1515 * '&#x' [0-9a-fA-F]+ ';'
1516 *
1517 * A PEReference may have been detectect in the current input stream
1518 * the handling is done accordingly to
1519 * http://www.w3.org/TR/REC-xml#entproc
1520 */
1521void
1522xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1523 xmlParserInputPtr input;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001524 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001525 xmlEntityPtr ent = NULL;
1526
Daniel Veillard35008381999-10-25 13:15:52 +00001527 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00001528 return;
1529 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001530 if (RAW != '&') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001531 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001532 if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001533 switch(ctxt->instate) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001534 case XML_PARSER_ENTITY_DECL:
1535 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001536 case XML_PARSER_CDATA_SECTION:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001537 case XML_PARSER_COMMENT:
Daniel Veillardcf461992000-03-14 18:30:20 +00001538 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001539 /* we just ignore it there */
1540 return;
1541 case XML_PARSER_START_TAG:
1542 return;
1543 case XML_PARSER_END_TAG:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001544 return;
1545 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001546 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001547 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1548 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1549 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001550 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001551 return;
1552 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001553 case XML_PARSER_START:
1554 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001555 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1557 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1558 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001559 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001560 return;
1561 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001562 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1564 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1565 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001566 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001567 return;
1568 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001569 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData,
1572 "CharRef are forbiden in DTDs!\n");
1573 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001574 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001575 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001576 case XML_PARSER_ENTITY_VALUE:
1577 /*
1578 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001579 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001580 * entity value to be able to save the internal
1581 * subset of the document.
1582 * This will be handled by xmlDecodeEntities
1583 */
1584 return;
1585 case XML_PARSER_CONTENT:
1586 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001587 ctxt->token = xmlParseCharRef(ctxt);
1588 return;
1589 }
1590 return;
1591 }
1592
1593 switch(ctxt->instate) {
1594 case XML_PARSER_CDATA_SECTION:
1595 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001596 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001597 case XML_PARSER_COMMENT:
Daniel Veillardcf461992000-03-14 18:30:20 +00001598 case XML_PARSER_SYSTEM_LITERAL:
1599 case XML_PARSER_CONTENT:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001600 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001601 case XML_PARSER_START_TAG:
1602 return;
1603 case XML_PARSER_END_TAG:
1604 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001605 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001606 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1609 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001610 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001611 return;
1612 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001613 case XML_PARSER_START:
1614 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001615 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1618 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001619 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001620 return;
1621 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001622 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1625 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001626 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001627 return;
1628 case XML_PARSER_ENTITY_VALUE:
1629 /*
1630 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001631 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001632 * entity value to be able to save the internal
1633 * subset of the document.
1634 * This will be handled by xmlDecodeEntities
1635 */
1636 return;
1637 case XML_PARSER_ATTRIBUTE_VALUE:
1638 /*
1639 * NOTE: in the case of attributes values, we don't do the
1640 * substitution here unless we are in a mode where
1641 * the parser is explicitely asked to substitute
1642 * entities. The SAX callback is called with values
1643 * without entity substitution.
1644 * This will then be handled by xmlDecodeEntities
1645 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00001646 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001647 case XML_PARSER_ENTITY_DECL:
1648 /*
1649 * we just ignore it there
1650 * the substitution will be done once the entity is referenced
1651 */
1652 return;
1653 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001654 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "Entity references are forbiden in DTDs!\n");
1658 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001659 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001660 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001661 }
1662
1663 NEXT;
1664 name = xmlScanName(ctxt);
1665 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001666 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1668 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1669 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001670 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001671 ctxt->token = '&';
1672 return;
1673 }
1674 if (NXT(xmlStrlen(name)) != ';') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001675 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1677 ctxt->sax->error(ctxt->userData,
1678 "Entity reference: ';' expected\n");
1679 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001680 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001681 ctxt->token = '&';
Daniel Veillard6454aec1999-09-02 22:04:43 +00001682 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001683 return;
1684 }
1685 SKIP(xmlStrlen(name) + 1);
1686 if (ctxt->sax != NULL) {
1687 if (ctxt->sax->getEntity != NULL)
1688 ent = ctxt->sax->getEntity(ctxt->userData, name);
1689 }
1690
1691 /*
1692 * [ WFC: Entity Declared ]
1693 * the Name given in the entity reference must match that in an entity
1694 * declaration, except that well-formed documents need not declare any
1695 * of the following entities: amp, lt, gt, apos, quot.
1696 */
1697 if (ent == NULL)
1698 ent = xmlGetPredefinedEntity(name);
1699 if (ent == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001700 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702 ctxt->sax->error(ctxt->userData,
1703 "Entity reference: entity %s not declared\n",
1704 name);
1705 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001706 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00001707 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001708 return;
1709 }
1710
1711 /*
1712 * [ WFC: Parsed Entity ]
1713 * An entity reference must not contain the name of an unparsed entity
1714 */
Daniel Veillardcf461992000-03-14 18:30:20 +00001715 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001716 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1718 ctxt->sax->error(ctxt->userData,
1719 "Entity reference to unparsed entity %s\n", name);
1720 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001721 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001722 }
1723
Daniel Veillardcf461992000-03-14 18:30:20 +00001724 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001725 ctxt->token = ent->content[0];
Daniel Veillard6454aec1999-09-02 22:04:43 +00001726 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001727 return;
1728 }
1729 input = xmlNewEntityInputStream(ctxt, ent);
1730 xmlPushInput(ctxt, input);
Daniel Veillard6454aec1999-09-02 22:04:43 +00001731 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001732 return;
1733}
1734
1735/**
1736 * xmlParserHandlePEReference:
1737 * @ctxt: the parser context
1738 *
1739 * [69] PEReference ::= '%' Name ';'
1740 *
1741 * [ WFC: No Recursion ]
1742 * TODO A parsed entity must not contain a recursive
1743 * reference to itself, either directly or indirectly.
1744 *
1745 * [ WFC: Entity Declared ]
1746 * In a document without any DTD, a document with only an internal DTD
1747 * subset which contains no parameter entity references, or a document
1748 * with "standalone='yes'", ... ... The declaration of a parameter
1749 * entity must precede any reference to it...
1750 *
1751 * [ VC: Entity Declared ]
1752 * In a document with an external subset or external parameter entities
1753 * with "standalone='no'", ... ... The declaration of a parameter entity
1754 * must precede any reference to it...
1755 *
1756 * [ WFC: In DTD ]
1757 * Parameter-entity references may only appear in the DTD.
1758 * NOTE: misleading but this is handled.
1759 *
1760 * A PEReference may have been detected in the current input stream
1761 * the handling is done accordingly to
1762 * http://www.w3.org/TR/REC-xml#entproc
1763 * i.e.
1764 * - Included in literal in entity values
1765 * - Included as Paraemeter Entity reference within DTDs
1766 */
1767void
1768xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001769 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001770 xmlEntityPtr entity = NULL;
1771 xmlParserInputPtr input;
1772
Daniel Veillard35008381999-10-25 13:15:52 +00001773 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00001774 return;
1775 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001776 if (RAW != '%') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001777 switch(ctxt->instate) {
1778 case XML_PARSER_CDATA_SECTION:
1779 return;
1780 case XML_PARSER_COMMENT:
1781 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001782 case XML_PARSER_START_TAG:
1783 return;
1784 case XML_PARSER_END_TAG:
1785 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001786 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001787 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1789 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1790 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001791 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001792 return;
1793 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001794 case XML_PARSER_START:
1795 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001796 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1799 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001800 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001801 return;
1802 case XML_PARSER_ENTITY_DECL:
1803 case XML_PARSER_CONTENT:
1804 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001805 case XML_PARSER_PI:
Daniel Veillardcf461992000-03-14 18:30:20 +00001806 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001807 /* we just ignore it there */
1808 return;
1809 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001810 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1812 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1813 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001814 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001815 return;
1816 case XML_PARSER_ENTITY_VALUE:
1817 /*
1818 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001819 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00001820 * entity value to be able to save the internal
1821 * subset of the document.
1822 * This will be handled by xmlDecodeEntities
1823 */
1824 return;
1825 case XML_PARSER_DTD:
1826 /*
1827 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1828 * In the internal DTD subset, parameter-entity references
1829 * can occur only where markup declarations can occur, not
1830 * within markup declarations.
1831 * In that case this is handled in xmlParseMarkupDecl
1832 */
1833 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1834 return;
1835 }
1836
1837 NEXT;
1838 name = xmlParseName(ctxt);
1839 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001840 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1843 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001844 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001845 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00001846 if (RAW == ';') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001847 NEXT;
1848 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1849 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1850 if (entity == NULL) {
1851
1852 /*
1853 * [ WFC: Entity Declared ]
1854 * In a document without any DTD, a document with only an
1855 * internal DTD subset which contains no parameter entity
1856 * references, or a document with "standalone='yes'", ...
1857 * ... The declaration of a parameter entity must precede
1858 * any reference to it...
1859 */
1860 if ((ctxt->standalone == 1) ||
1861 ((ctxt->hasExternalSubset == 0) &&
1862 (ctxt->hasPErefs == 0))) {
1863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1864 ctxt->sax->error(ctxt->userData,
1865 "PEReference: %%%s; not found\n", name);
1866 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001867 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001868 } else {
1869 /*
1870 * [ VC: Entity Declared ]
1871 * In a document with an external subset or external
1872 * parameter entities with "standalone='no'", ...
1873 * ... The declaration of a parameter entity must precede
1874 * any reference to it...
1875 */
1876 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1877 ctxt->sax->warning(ctxt->userData,
1878 "PEReference: %%%s; not found\n", name);
1879 ctxt->valid = 0;
1880 }
1881 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00001882 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1883 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001884 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00001885 * TODO !!! handle the extra spaces added before and after
Daniel Veillardb05deb71999-08-10 19:04:08 +00001886 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardb05deb71999-08-10 19:04:08 +00001887 */
1888 input = xmlNewEntityInputStream(ctxt, entity);
1889 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00001890 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1891 (RAW == '<') && (NXT(1) == '?') &&
1892 (NXT(2) == 'x') && (NXT(3) == 'm') &&
1893 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1894 xmlParseTextDecl(ctxt);
1895 }
1896 if (ctxt->token == 0)
1897 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00001898 } else {
1899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1900 ctxt->sax->error(ctxt->userData,
1901 "xmlHandlePEReference: %s is not a parameter entity\n",
1902 name);
1903 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001904 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001905 }
1906 }
1907 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001908 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1910 ctxt->sax->error(ctxt->userData,
1911 "xmlHandlePEReference: expecting ';'\n");
1912 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001913 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001914 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00001915 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001916 }
1917}
1918
Daniel Veillard011b63c1999-06-02 17:44:04 +00001919/*
1920 * Macro used to grow the current buffer.
1921 */
1922#define growBuffer(buffer) { \
1923 buffer##_size *= 2; \
Daniel Veillard0142b842000-01-14 14:45:24 +00001924 buffer = (xmlChar *) \
1925 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00001926 if (buffer == NULL) { \
1927 perror("realloc failed"); \
Daniel Veillard0142b842000-01-14 14:45:24 +00001928 return(NULL); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00001929 } \
1930}
1931
Daniel Veillard011b63c1999-06-02 17:44:04 +00001932/**
1933 * xmlDecodeEntities:
1934 * @ctxt: the parser context
1935 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1936 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001937 * @end: an end marker xmlChar, 0 if none
1938 * @end2: an end marker xmlChar, 0 if none
1939 * @end3: an end marker xmlChar, 0 if none
Daniel Veillard011b63c1999-06-02 17:44:04 +00001940 *
1941 * [67] Reference ::= EntityRef | CharRef
1942 *
1943 * [69] PEReference ::= '%' Name ';'
1944 *
1945 * Returns A newly allocated string with the substitution done. The caller
1946 * must deallocate it !
1947 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001948xmlChar *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001949xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001950 xmlChar end, xmlChar end2, xmlChar end3) {
1951 xmlChar *buffer = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001952 int buffer_size = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001953 int nbchars = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001954
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001955 xmlChar *current = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001956 xmlEntityPtr ent;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001957 unsigned int max = (unsigned int) len;
Daniel Veillardcf461992000-03-14 18:30:20 +00001958 int c,l;
1959
1960 if (ctxt->depth > 40) {
1961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1962 ctxt->sax->error(ctxt->userData,
1963 "Detected entity reference loop\n");
1964 ctxt->wellFormed = 0;
1965 ctxt->disableSAX = 1;
1966 ctxt->errNo = XML_ERR_ENTITY_LOOP;
1967 return(NULL);
1968 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00001969
1970 /*
1971 * allocate a translation buffer.
1972 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001973 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001974 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
Daniel Veillard011b63c1999-06-02 17:44:04 +00001975 if (buffer == NULL) {
1976 perror("xmlDecodeEntities: malloc failed");
1977 return(NULL);
1978 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00001979
1980 /*
1981 * Ok loop until we reach one of the ending char or a size limit.
1982 */
Daniel Veillardcf461992000-03-14 18:30:20 +00001983 c = CUR_CHAR(l);
1984 while ((nbchars < max) && (c != end) &&
1985 (c != end2) && (c != end3)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00001986
Daniel Veillardcf461992000-03-14 18:30:20 +00001987 if (c == 0) break;
1988 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001989 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00001990 COPY_BUF(0,buffer,nbchars,val);
1991 NEXTL(l);
1992 } else if ((c == '&') && (ctxt->token != '&') &&
1993 (what & XML_SUBSTITUTE_REF)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001994 ent = xmlParseEntityRef(ctxt);
1995 if ((ent != NULL) &&
1996 (ctxt->replaceEntities != 0)) {
1997 current = ent->content;
1998 while (*current != 0) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001999 buffer[nbchars++] = *current++;
2000 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002001 growBuffer(buffer);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002002 }
2003 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00002004 } else if (ent != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002005 const xmlChar *cur = ent->name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002006
Daniel Veillardcf461992000-03-14 18:30:20 +00002007 buffer[nbchars++] = '&';
2008 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002009 growBuffer(buffer);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002010 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002011 while (*cur != 0) {
2012 buffer[nbchars++] = *cur++;
2013 }
2014 buffer[nbchars++] = ';';
Daniel Veillard011b63c1999-06-02 17:44:04 +00002015 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002016 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002017 /*
2018 * a PEReference induce to switch the entity flow,
2019 * we break here to flush the current set of chars
2020 * parsed if any. We will be called back later.
2021 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002022 if (nbchars != 0) break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002023
2024 xmlParsePEReference(ctxt);
2025
2026 /*
2027 * Pop-up of finished entities.
2028 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002029 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00002030 xmlPopInput(ctxt);
2031
Daniel Veillardb05deb71999-08-10 19:04:08 +00002032 break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002033 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002034 COPY_BUF(l,buffer,nbchars,c);
2035 NEXTL(l);
2036 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Raph Levien05240da1999-06-15 21:27:11 +00002037 growBuffer(buffer);
Raph Levien05240da1999-06-15 21:27:11 +00002038 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002039 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002040 c = CUR_CHAR(l);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002041 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002042 buffer[nbchars++] = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002043 return(buffer);
2044}
2045
Daniel Veillard10a2c651999-12-12 13:03:50 +00002046/**
2047 * xmlStringDecodeEntities:
2048 * @ctxt: the parser context
2049 * @str: the input string
2050 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2051 * @end: an end marker xmlChar, 0 if none
2052 * @end2: an end marker xmlChar, 0 if none
2053 * @end3: an end marker xmlChar, 0 if none
2054 *
2055 * [67] Reference ::= EntityRef | CharRef
2056 *
2057 * [69] PEReference ::= '%' Name ';'
2058 *
2059 * Returns A newly allocated string with the substitution done. The caller
2060 * must deallocate it !
2061 */
2062xmlChar *
2063xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2064 xmlChar end, xmlChar end2, xmlChar end3) {
2065 xmlChar *buffer = NULL;
2066 int buffer_size = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002067
2068 xmlChar *current = NULL;
2069 xmlEntityPtr ent;
Daniel Veillardcf461992000-03-14 18:30:20 +00002070 int c,l;
2071 int nbchars = 0;
2072
2073 if (ctxt->depth > 40) {
2074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2075 ctxt->sax->error(ctxt->userData,
2076 "Detected entity reference loop\n");
2077 ctxt->wellFormed = 0;
2078 ctxt->disableSAX = 1;
2079 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2080 return(NULL);
2081 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002082
2083 /*
2084 * allocate a translation buffer.
2085 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002086 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002087 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2088 if (buffer == NULL) {
2089 perror("xmlDecodeEntities: malloc failed");
2090 return(NULL);
2091 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002092
2093 /*
2094 * Ok loop until we reach one of the ending char or a size limit.
2095 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002096 c = CUR_SCHAR(str, l);
2097 while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002098
Daniel Veillardcf461992000-03-14 18:30:20 +00002099 if (c == 0) break;
2100 if ((c == '&') && (str[1] == '#')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002101 int val = xmlParseStringCharRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +00002102 if (val != 0) {
2103 COPY_BUF(0,buffer,nbchars,val);
2104 }
2105 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002106 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +00002107 if ((ent != NULL) && (ent->content != NULL)) {
2108 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002109
Daniel Veillardcf461992000-03-14 18:30:20 +00002110 ctxt->depth++;
2111 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2112 0, 0, 0);
2113 ctxt->depth--;
2114 if (rep != NULL) {
2115 current = rep;
2116 while (*current != 0) {
2117 buffer[nbchars++] = *current++;
2118 if (nbchars >
2119 buffer_size - XML_PARSER_BUFFER_SIZE) {
2120 growBuffer(buffer);
2121 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002122 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002123 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002124 }
2125 } else if (ent != NULL) {
2126 int i = xmlStrlen(ent->name);
2127 const xmlChar *cur = ent->name;
2128
Daniel Veillardcf461992000-03-14 18:30:20 +00002129 buffer[nbchars++] = '&';
2130 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002131 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002132 }
2133 for (;i > 0;i--)
Daniel Veillardcf461992000-03-14 18:30:20 +00002134 buffer[nbchars++] = *cur++;
2135 buffer[nbchars++] = ';';
Daniel Veillard10a2c651999-12-12 13:03:50 +00002136 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002137 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002138 ent = xmlParseStringPEReference(ctxt, &str);
2139 if (ent != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002140 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002141
Daniel Veillardcf461992000-03-14 18:30:20 +00002142 ctxt->depth++;
2143 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2144 0, 0, 0);
2145 ctxt->depth--;
2146 if (rep != NULL) {
2147 current = rep;
2148 while (*current != 0) {
2149 buffer[nbchars++] = *current++;
2150 if (nbchars >
2151 buffer_size - XML_PARSER_BUFFER_SIZE) {
2152 growBuffer(buffer);
2153 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002154 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002155 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002156 }
2157 }
2158 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002159 COPY_BUF(l,buffer,nbchars,c);
2160 str += l;
2161 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002162 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002163 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002164 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002165 c = CUR_SCHAR(str, l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002166 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002167 buffer[nbchars++] = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002168 return(buffer);
2169}
2170
Daniel Veillard260a68f1998-08-13 03:39:55 +00002171
2172/************************************************************************
2173 * *
Daniel Veillard27d88741999-05-29 11:51:49 +00002174 * Commodity functions to handle encodings *
2175 * *
2176 ************************************************************************/
2177
Daniel Veillardcf461992000-03-14 18:30:20 +00002178/*
2179 * xmlCheckLanguageID
2180 * @lang: pointer to the string value
2181 *
2182 * Checks that the value conforms to the LanguageID production:
2183 *
2184 * [33] LanguageID ::= Langcode ('-' Subcode)*
2185 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2186 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2187 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2188 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2189 * [38] Subcode ::= ([a-z] | [A-Z])+
2190 *
2191 * Returns 1 if correct 0 otherwise
2192 **/
2193int
2194xmlCheckLanguageID(const xmlChar *lang) {
2195 const xmlChar *cur = lang;
2196
2197 if (cur == NULL)
2198 return(0);
2199 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2200 ((cur[0] == 'I') && (cur[1] == '-'))) {
2201 /*
2202 * IANA code
2203 */
2204 cur += 2;
2205 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2206 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2207 cur++;
2208 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2209 ((cur[0] == 'X') && (cur[1] == '-'))) {
2210 /*
2211 * User code
2212 */
2213 cur += 2;
2214 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2215 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2216 cur++;
2217 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2218 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2219 /*
2220 * ISO639
2221 */
2222 cur++;
2223 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2224 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2225 cur++;
2226 else
2227 return(0);
2228 } else
2229 return(0);
2230 while (cur[0] != 0) {
2231 if (cur[0] != '-')
2232 return(0);
2233 cur++;
2234 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2235 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2236 cur++;
2237 else
2238 return(0);
2239 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2240 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2241 cur++;
2242 }
2243 return(1);
2244}
2245
Daniel Veillard27d88741999-05-29 11:51:49 +00002246/**
2247 * xmlSwitchEncoding:
2248 * @ctxt: the parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00002249 * @enc: the encoding value (number)
Daniel Veillard27d88741999-05-29 11:51:49 +00002250 *
2251 * change the input functions when discovering the character encoding
2252 * of a given entity.
Daniel Veillard27d88741999-05-29 11:51:49 +00002253 */
2254void
2255xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2256{
Daniel Veillardcf461992000-03-14 18:30:20 +00002257 xmlCharEncodingHandlerPtr handler;
2258
2259 handler = xmlGetCharEncodingHandler(enc);
2260 if (handler != NULL) {
2261 if (ctxt->input != NULL) {
2262 if (ctxt->input->buf != NULL) {
2263 if (ctxt->input->buf->encoder != NULL) {
2264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2265 ctxt->sax->error(ctxt->userData,
2266 "xmlSwitchEncoding : encoder already regitered\n");
2267 return;
2268 }
2269 ctxt->input->buf->encoder = handler;
2270
2271 /*
2272 * Is there already some content down the pipe to convert
2273 */
2274 if ((ctxt->input->buf->buffer != NULL) &&
2275 (ctxt->input->buf->buffer->use > 0)) {
2276 xmlChar *buf;
2277 int res, len, size;
2278 int processed;
2279
2280 /*
2281 * Specific handling of the Byte Order Mark for
2282 * UTF-16
2283 */
2284 if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2285 (ctxt->input->cur[0] == 0xFF) &&
2286 (ctxt->input->cur[1] == 0xFE)) {
2287 SKIP(2);
2288 }
2289 if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2290 (ctxt->input->cur[0] == 0xFE) &&
2291 (ctxt->input->cur[1] == 0xFF)) {
2292 SKIP(2);
2293 }
2294
2295 /*
2296 * convert the non processed part
2297 */
2298 processed = ctxt->input->cur - ctxt->input->base;
2299 len = ctxt->input->buf->buffer->use - processed;
2300
2301 if (len <= 0) {
2302 return;
2303 }
2304 size = ctxt->input->buf->buffer->use * 4;
2305 if (size < 4000)
2306 size = 4000;
2307retry_larger:
2308 buf = (xmlChar *) xmlMalloc(size + 1);
2309 if (buf == NULL) {
2310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2311 ctxt->sax->error(ctxt->userData,
2312 "xmlSwitchEncoding : out of memory\n");
2313 return;
2314 }
2315 /* TODO !!! Handling of buf too small */
2316 res = handler->input(buf, size, ctxt->input->cur, &len);
2317 if (res == -1) {
2318 size *= 2;
2319 xmlFree(buf);
2320 goto retry_larger;
2321 }
2322 if ((res < 0) ||
2323 (len != ctxt->input->buf->buffer->use - processed)) {
2324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2325 ctxt->sax->error(ctxt->userData,
2326 "xmlSwitchEncoding : conversion failed\n");
2327 xmlFree(buf);
2328 return;
2329 }
2330
2331 /*
2332 * Conversion succeeded, get rid of the old buffer
2333 */
2334 xmlFree(ctxt->input->buf->buffer->content);
2335 ctxt->input->buf->buffer->content = buf;
2336 ctxt->input->base = buf;
2337 ctxt->input->cur = buf;
2338 ctxt->input->buf->buffer->size = size;
2339 ctxt->input->buf->buffer->use = res;
2340 buf[res] = 0;
2341 }
2342 return;
2343 } else {
2344 if (ctxt->input->length == 0) {
2345 /*
2346 * When parsing a static memory array one must know the
2347 * size to be able to convert the buffer.
2348 */
2349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2350 ctxt->sax->error(ctxt->userData,
2351 "xmlSwitchEncoding : no input\n");
2352 return;
2353 } else {
2354 xmlChar *buf;
2355 int res, len;
2356 int processed = ctxt->input->cur - ctxt->input->base;
2357
2358 /*
2359 * convert the non processed part
2360 */
2361 len = ctxt->input->length - processed;
2362 if (len <= 0) {
2363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2364 ctxt->sax->error(ctxt->userData,
2365 "xmlSwitchEncoding : input fully consumed?\n");
2366 return;
2367 }
2368 buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2369 if (buf == NULL) {
2370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2371 ctxt->sax->error(ctxt->userData,
2372 "xmlSwitchEncoding : out of memory\n");
2373 return;
2374 }
2375 res = handler->input(buf, ctxt->input->length * 4,
2376 ctxt->input->cur, &len);
2377 if ((res < 0) ||
2378 (len != ctxt->input->length - processed)) {
2379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2380 ctxt->sax->error(ctxt->userData,
2381 "xmlSwitchEncoding : conversion failed\n");
2382 xmlFree(buf);
2383 return;
2384 }
2385 /*
2386 * Conversion succeeded, get rid of the old buffer
2387 */
2388 if ((ctxt->input->free != NULL) &&
2389 (ctxt->input->base != NULL))
2390 ctxt->input->free((xmlChar *) ctxt->input->base);
2391 ctxt->input->base = ctxt->input->cur = buf;
2392 ctxt->input->length = res;
2393 }
2394 }
2395 } else {
2396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2397 ctxt->sax->error(ctxt->userData,
2398 "xmlSwitchEncoding : no input\n");
2399 }
2400 }
2401
Daniel Veillard27d88741999-05-29 11:51:49 +00002402 switch (enc) {
2403 case XML_CHAR_ENCODING_ERROR:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002404 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2406 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2407 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002408 ctxt->disableSAX = 1;
Daniel Veillard27d88741999-05-29 11:51:49 +00002409 break;
2410 case XML_CHAR_ENCODING_NONE:
2411 /* let's assume it's UTF-8 without the XML decl */
2412 return;
2413 case XML_CHAR_ENCODING_UTF8:
2414 /* default encoding, no conversion should be needed */
2415 return;
2416 case XML_CHAR_ENCODING_UTF16LE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002417 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2419 ctxt->sax->error(ctxt->userData,
2420 "char encoding UTF16 little endian not supported\n");
2421 break;
2422 case XML_CHAR_ENCODING_UTF16BE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002423 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2425 ctxt->sax->error(ctxt->userData,
2426 "char encoding UTF16 big endian not supported\n");
2427 break;
2428 case XML_CHAR_ENCODING_UCS4LE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002429 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2431 ctxt->sax->error(ctxt->userData,
2432 "char encoding USC4 little endian not supported\n");
2433 break;
2434 case XML_CHAR_ENCODING_UCS4BE:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002435 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2437 ctxt->sax->error(ctxt->userData,
2438 "char encoding USC4 big endian not supported\n");
2439 break;
2440 case XML_CHAR_ENCODING_EBCDIC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002441 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002442 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2443 ctxt->sax->error(ctxt->userData,
2444 "char encoding EBCDIC not supported\n");
2445 break;
2446 case XML_CHAR_ENCODING_UCS4_2143:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002447 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2449 ctxt->sax->error(ctxt->userData,
2450 "char encoding UCS4 2143 not supported\n");
2451 break;
2452 case XML_CHAR_ENCODING_UCS4_3412:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002453 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2455 ctxt->sax->error(ctxt->userData,
2456 "char encoding UCS4 3412 not supported\n");
2457 break;
2458 case XML_CHAR_ENCODING_UCS2:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002459 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2461 ctxt->sax->error(ctxt->userData,
2462 "char encoding UCS2 not supported\n");
2463 break;
2464 case XML_CHAR_ENCODING_8859_1:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002465 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2467 ctxt->sax->error(ctxt->userData,
2468 "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2469 break;
2470 case XML_CHAR_ENCODING_8859_2:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002471 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002472 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2473 ctxt->sax->error(ctxt->userData,
2474 "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2475 break;
2476 case XML_CHAR_ENCODING_8859_3:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002477 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2479 ctxt->sax->error(ctxt->userData,
2480 "char encoding ISO_8859_3 not supported\n");
2481 break;
2482 case XML_CHAR_ENCODING_8859_4:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002483 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2485 ctxt->sax->error(ctxt->userData,
2486 "char encoding ISO_8859_4 not supported\n");
2487 break;
2488 case XML_CHAR_ENCODING_8859_5:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002489 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2491 ctxt->sax->error(ctxt->userData,
2492 "char encoding ISO_8859_5 not supported\n");
2493 break;
2494 case XML_CHAR_ENCODING_8859_6:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002495 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2497 ctxt->sax->error(ctxt->userData,
2498 "char encoding ISO_8859_6 not supported\n");
2499 break;
2500 case XML_CHAR_ENCODING_8859_7:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002501 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2503 ctxt->sax->error(ctxt->userData,
2504 "char encoding ISO_8859_7 not supported\n");
2505 break;
2506 case XML_CHAR_ENCODING_8859_8:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002507 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2509 ctxt->sax->error(ctxt->userData,
2510 "char encoding ISO_8859_8 not supported\n");
2511 break;
2512 case XML_CHAR_ENCODING_8859_9:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002513 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2515 ctxt->sax->error(ctxt->userData,
2516 "char encoding ISO_8859_9 not supported\n");
2517 break;
2518 case XML_CHAR_ENCODING_2022_JP:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002519 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2521 ctxt->sax->error(ctxt->userData,
2522 "char encoding ISO-2022-JPnot supported\n");
2523 break;
2524 case XML_CHAR_ENCODING_SHIFT_JIS:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002525 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2527 ctxt->sax->error(ctxt->userData,
2528 "char encoding Shift_JISnot supported\n");
2529 break;
2530 case XML_CHAR_ENCODING_EUC_JP:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002531 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard27d88741999-05-29 11:51:49 +00002532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2533 ctxt->sax->error(ctxt->userData,
2534 "char encoding EUC-JPnot supported\n");
2535 break;
2536 }
2537}
2538
2539/************************************************************************
2540 * *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002541 * Commodity functions to handle xmlChars *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002542 * *
2543 ************************************************************************/
2544
Daniel Veillard11e00581998-10-24 18:27:49 +00002545/**
2546 * xmlStrndup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002547 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002548 * @len: the len of @cur
2549 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002550 * a strndup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002551 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002552 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002553 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002554xmlChar *
2555xmlStrndup(const xmlChar *cur, int len) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002556 xmlChar *ret;
2557
2558 if ((cur == NULL) || (len < 0)) return(NULL);
2559 ret = xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002560 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00002561 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002562 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002563 return(NULL);
2564 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002565 memcpy(ret, cur, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002566 ret[len] = 0;
2567 return(ret);
2568}
2569
Daniel Veillard11e00581998-10-24 18:27:49 +00002570/**
2571 * xmlStrdup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002572 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002573 *
Daniel Veillardcf461992000-03-14 18:30:20 +00002574 * a strdup for array of xmlChar's. Since they are supposed to be
2575 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2576 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00002577 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002578 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002579 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002580xmlChar *
2581xmlStrdup(const xmlChar *cur) {
2582 const xmlChar *p = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002583
Daniel Veillard10a2c651999-12-12 13:03:50 +00002584 if (cur == NULL) return(NULL);
Daniel Veillardcf461992000-03-14 18:30:20 +00002585 while (*p != 0) p++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002586 return(xmlStrndup(cur, p - cur));
2587}
2588
Daniel Veillard11e00581998-10-24 18:27:49 +00002589/**
2590 * xmlCharStrndup:
2591 * @cur: the input char *
2592 * @len: the len of @cur
2593 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002594 * a strndup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002595 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002596 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002597 */
2598
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002599xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002600xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002601 int i;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002602 xmlChar *ret;
2603
2604 if ((cur == NULL) || (len < 0)) return(NULL);
2605 ret = xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002606 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00002607 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002608 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002609 return(NULL);
2610 }
2611 for (i = 0;i < len;i++)
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002612 ret[i] = (xmlChar) cur[i];
Daniel Veillard260a68f1998-08-13 03:39:55 +00002613 ret[len] = 0;
2614 return(ret);
2615}
2616
Daniel Veillard11e00581998-10-24 18:27:49 +00002617/**
2618 * xmlCharStrdup:
2619 * @cur: the input char *
2620 * @len: the len of @cur
2621 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002622 * a strdup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002623 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002624 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002625 */
2626
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002627xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002628xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002629 const char *p = cur;
2630
Daniel Veillard10a2c651999-12-12 13:03:50 +00002631 if (cur == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002632 while (*p != '\0') p++;
2633 return(xmlCharStrndup(cur, p - cur));
2634}
2635
Daniel Veillard11e00581998-10-24 18:27:49 +00002636/**
2637 * xmlStrcmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002638 * @str1: the first xmlChar *
2639 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002640 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002641 * a strcmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002642 *
2643 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00002644 */
2645
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002646int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002647xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002648 register int tmp;
2649
Daniel Veillard10a2c651999-12-12 13:03:50 +00002650 if ((str1 == NULL) && (str2 == NULL)) return(0);
2651 if (str1 == NULL) return(-1);
2652 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002653 do {
2654 tmp = *str1++ - *str2++;
2655 if (tmp != 0) return(tmp);
2656 } while ((*str1 != 0) && (*str2 != 0));
2657 return (*str1 - *str2);
2658}
2659
Daniel Veillard11e00581998-10-24 18:27:49 +00002660/**
2661 * xmlStrncmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002662 * @str1: the first xmlChar *
2663 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002664 * @len: the max comparison length
2665 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002666 * a strncmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002667 *
2668 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00002669 */
2670
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002671int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002672xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002673 register int tmp;
2674
2675 if (len <= 0) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002676 if ((str1 == NULL) && (str2 == NULL)) return(0);
2677 if (str1 == NULL) return(-1);
2678 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002679 do {
2680 tmp = *str1++ - *str2++;
2681 if (tmp != 0) return(tmp);
2682 len--;
2683 if (len <= 0) return(0);
2684 } while ((*str1 != 0) && (*str2 != 0));
2685 return (*str1 - *str2);
2686}
2687
Daniel Veillard11e00581998-10-24 18:27:49 +00002688/**
2689 * xmlStrchr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002690 * @str: the xmlChar * array
2691 * @val: the xmlChar to search
Daniel Veillard11e00581998-10-24 18:27:49 +00002692 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002693 * a strchr for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002694 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002695 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002696 */
2697
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002698const xmlChar *
2699xmlStrchr(const xmlChar *str, xmlChar val) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002700 if (str == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002701 while (*str != 0) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002702 if (*str == val) return((xmlChar *) str);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002703 str++;
2704 }
2705 return(NULL);
2706}
2707
Daniel Veillard11e00581998-10-24 18:27:49 +00002708/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002709 * xmlStrstr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002710 * @str: the xmlChar * array (haystack)
2711 * @val: the xmlChar to search (needle)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002712 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002713 * a strstr for xmlChar's
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002714 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002715 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002716 */
2717
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002718const xmlChar *
2719xmlStrstr(const xmlChar *str, xmlChar *val) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002720 int n;
2721
2722 if (str == NULL) return(NULL);
2723 if (val == NULL) return(NULL);
2724 n = xmlStrlen(val);
2725
2726 if (n == 0) return(str);
2727 while (*str != 0) {
2728 if (*str == *val) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002729 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002730 }
2731 str++;
2732 }
2733 return(NULL);
2734}
2735
2736/**
2737 * xmlStrsub:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002738 * @str: the xmlChar * array (haystack)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002739 * @start: the index of the first char (zero based)
2740 * @len: the length of the substring
2741 *
2742 * Extract a substring of a given string
2743 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002744 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002745 */
2746
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002747xmlChar *
2748xmlStrsub(const xmlChar *str, int start, int len) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00002749 int i;
2750
2751 if (str == NULL) return(NULL);
2752 if (start < 0) return(NULL);
2753 if (len < 0) return(NULL);
2754
2755 for (i = 0;i < start;i++) {
2756 if (*str == 0) return(NULL);
2757 str++;
2758 }
2759 if (*str == 0) return(NULL);
2760 return(xmlStrndup(str, len));
2761}
2762
2763/**
Daniel Veillard11e00581998-10-24 18:27:49 +00002764 * xmlStrlen:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002765 * @str: the xmlChar * array
Daniel Veillard11e00581998-10-24 18:27:49 +00002766 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00002767 * length of a xmlChar's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00002768 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002769 * Returns the number of xmlChar contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002770 */
2771
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002772int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002773xmlStrlen(const xmlChar *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002774 int len = 0;
2775
2776 if (str == NULL) return(0);
2777 while (*str != 0) {
2778 str++;
2779 len++;
2780 }
2781 return(len);
2782}
2783
Daniel Veillard11e00581998-10-24 18:27:49 +00002784/**
2785 * xmlStrncat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002786 * @cur: the original xmlChar * array
2787 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00002788 * @len: the length of @add
2789 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002790 * a strncat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00002791 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002792 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002793 */
2794
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002795xmlChar *
2796xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002797 int size;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002798 xmlChar *ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002799
2800 if ((add == NULL) || (len == 0))
2801 return(cur);
2802 if (cur == NULL)
2803 return(xmlStrndup(add, len));
2804
2805 size = xmlStrlen(cur);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002806 ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002807 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00002808 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002809 (size + len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002810 return(cur);
2811 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002812 memcpy(&ret[size], add, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00002813 ret[size + len] = 0;
2814 return(ret);
2815}
2816
Daniel Veillard11e00581998-10-24 18:27:49 +00002817/**
2818 * xmlStrcat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002819 * @cur: the original xmlChar * array
2820 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00002821 *
Daniel Veillardcf461992000-03-14 18:30:20 +00002822 * a strcat for array of xmlChar's. Since they are supposed to be
2823 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2824 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00002825 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002826 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002827 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002828xmlChar *
2829xmlStrcat(xmlChar *cur, const xmlChar *add) {
2830 const xmlChar *p = add;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002831
2832 if (add == NULL) return(cur);
2833 if (cur == NULL)
2834 return(xmlStrdup(add));
2835
Daniel Veillardcf461992000-03-14 18:30:20 +00002836 while (*p != 0) p++;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002837 return(xmlStrncat(cur, add, p - add));
2838}
2839
2840/************************************************************************
2841 * *
2842 * Commodity functions, cleanup needed ? *
2843 * *
2844 ************************************************************************/
2845
Daniel Veillard11e00581998-10-24 18:27:49 +00002846/**
2847 * areBlanks:
2848 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002849 * @str: a xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00002850 * @len: the size of @str
2851 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00002852 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00002853 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00002854 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002855 */
2856
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002857static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002858 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002859 xmlNodePtr lastChild;
2860
Daniel Veillard83a30e72000-03-02 03:33:32 +00002861 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00002862 * Check for xml:space value.
2863 */
2864 if (*(ctxt->space) == 1)
2865 return(0);
2866
2867 /*
Daniel Veillard83a30e72000-03-02 03:33:32 +00002868 * Check that the string is made of blanks
2869 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00002870 for (i = 0;i < len;i++)
2871 if (!(IS_BLANK(str[i]))) return(0);
2872
Daniel Veillard83a30e72000-03-02 03:33:32 +00002873 /*
2874 * Look if the element is mixed content in the Dtd if available
2875 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00002876 if (ctxt->myDoc != NULL) {
2877 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2878 if (ret == 0) return(1);
2879 if (ret == 1) return(0);
2880 }
Daniel Veillard83a30e72000-03-02 03:33:32 +00002881
Daniel Veillardb05deb71999-08-10 19:04:08 +00002882 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00002883 * Otherwise, heuristic :-\
Daniel Veillardb05deb71999-08-10 19:04:08 +00002884 */
Daniel Veillard83a30e72000-03-02 03:33:32 +00002885 if (ctxt->keepBlanks)
2886 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002887 if (RAW != '<') return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00002888 if (ctxt->node == NULL) return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002889 if ((ctxt->node->children == NULL) &&
2890 (RAW == '<') && (NXT(1) == '/')) return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00002891
Daniel Veillard260a68f1998-08-13 03:39:55 +00002892 lastChild = xmlGetLastChild(ctxt->node);
2893 if (lastChild == NULL) {
Daniel Veillard90fb02c2000-03-02 03:46:43 +00002894 if (ctxt->node->content != NULL) return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002895 } else if (xmlNodeIsText(lastChild))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00002896 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002897 else if ((ctxt->node->children != NULL) &&
2898 (xmlNodeIsText(ctxt->node->children)))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00002899 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002900 return(1);
2901}
2902
Daniel Veillard11e00581998-10-24 18:27:49 +00002903/**
2904 * xmlHandleEntity:
2905 * @ctxt: an XML parser context
2906 * @entity: an XML entity pointer.
2907 *
2908 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +00002909 * stream ? When do we just handle that as a set of chars ?
Daniel Veillardb05deb71999-08-10 19:04:08 +00002910 *
2911 * OBSOLETE: to be removed at some point.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002912 */
2913
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002914void
2915xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00002916 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +00002917 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002918
2919 if (entity->content == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002920 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00002921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002922 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00002923 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00002924 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002925 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00002926 return;
2927 }
2928 len = xmlStrlen(entity->content);
2929 if (len <= 2) goto handle_as_char;
2930
2931 /*
2932 * Redefine its content as an input stream.
2933 */
Daniel Veillardccb09631998-10-27 06:21:04 +00002934 input = xmlNewEntityInputStream(ctxt, entity);
2935 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002936 return;
2937
2938handle_as_char:
2939 /*
2940 * Just handle the content as a set of chars.
2941 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002942 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2943 (ctxt->sax->characters != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00002944 ctxt->sax->characters(ctxt->userData, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002945
2946}
2947
2948/*
2949 * Forward definition for recusive behaviour.
2950 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002951void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2952void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002953
2954/************************************************************************
2955 * *
2956 * Extra stuff for namespace support *
2957 * Relates to http://www.w3.org/TR/WD-xml-names *
2958 * *
2959 ************************************************************************/
2960
Daniel Veillard11e00581998-10-24 18:27:49 +00002961/**
2962 * xmlNamespaceParseNCName:
2963 * @ctxt: an XML parser context
2964 *
2965 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00002966 *
2967 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2968 *
2969 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2970 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +00002971 *
2972 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00002973 */
2974
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002975xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00002976xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002977 xmlChar buf[XML_MAX_NAMELEN + 5];
2978 int len = 0, l;
2979 int cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002980
Daniel Veillardcf461992000-03-14 18:30:20 +00002981 /* load first the value of the char !!! */
2982 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00002983
Daniel Veillardcf461992000-03-14 18:30:20 +00002984 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2985 (cur == '.') || (cur == '-') ||
2986 (cur == '_') ||
2987 (IS_COMBINING(cur)) ||
2988 (IS_EXTENDER(cur))) {
2989 COPY_BUF(l,buf,len,cur);
2990 NEXTL(l);
2991 cur = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002992 if (len >= XML_MAX_NAMELEN) {
2993 fprintf(stderr,
2994 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00002995 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2996 (cur == '.') || (cur == '-') ||
2997 (cur == '_') ||
2998 (IS_COMBINING(cur)) ||
2999 (IS_EXTENDER(cur))) {
3000 NEXTL(l);
3001 cur = CUR_CHAR(l);
3002 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003003 break;
3004 }
3005 }
3006 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003007}
3008
Daniel Veillard11e00581998-10-24 18:27:49 +00003009/**
3010 * xmlNamespaceParseQName:
3011 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003012 * @prefix: a xmlChar **
Daniel Veillard11e00581998-10-24 18:27:49 +00003013 *
3014 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003015 *
3016 * [NS 5] QName ::= (Prefix ':')? LocalPart
3017 *
3018 * [NS 6] Prefix ::= NCName
3019 *
3020 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +00003021 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003022 * Returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +00003023 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003024 */
3025
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003026xmlChar *
3027xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3028 xmlChar *ret = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003029
3030 *prefix = NULL;
3031 ret = xmlNamespaceParseNCName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003032 if (RAW == ':') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003033 *prefix = ret;
3034 NEXT;
3035 ret = xmlNamespaceParseNCName(ctxt);
3036 }
3037
3038 return(ret);
3039}
3040
Daniel Veillard11e00581998-10-24 18:27:49 +00003041/**
Daniel Veillard517752b1999-04-05 12:20:10 +00003042 * xmlSplitQName:
Daniel Veillardcf461992000-03-14 18:30:20 +00003043 * @ctxt: an XML parser context
Daniel Veillard517752b1999-04-05 12:20:10 +00003044 * @name: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003045 * @prefix: a xmlChar **
Daniel Veillard517752b1999-04-05 12:20:10 +00003046 *
3047 * parse an XML qualified name string
3048 *
3049 * [NS 5] QName ::= (Prefix ':')? LocalPart
3050 *
3051 * [NS 6] Prefix ::= NCName
3052 *
3053 * [NS 7] LocalPart ::= NCName
3054 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003055 * Returns the local part, and prefix is updated
Daniel Veillard517752b1999-04-05 12:20:10 +00003056 * to get the Prefix if any.
3057 */
3058
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003059xmlChar *
Daniel Veillardcf461992000-03-14 18:30:20 +00003060xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3061 xmlChar buf[XML_MAX_NAMELEN + 5];
3062 int len = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003063 xmlChar *ret = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003064 const xmlChar *cur = name;
Daniel Veillardcf461992000-03-14 18:30:20 +00003065 int c,l;
Daniel Veillard517752b1999-04-05 12:20:10 +00003066
3067 *prefix = NULL;
Daniel Veillardb96e6431999-08-29 21:02:19 +00003068
3069 /* xml: prefix is not really a namespace */
3070 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071 (cur[2] == 'l') && (cur[3] == ':'))
3072 return(xmlStrdup(name));
3073
Daniel Veillardcf461992000-03-14 18:30:20 +00003074 /* nasty but valid */
3075 if (cur[0] == ':')
3076 return(xmlStrdup(name));
Daniel Veillard517752b1999-04-05 12:20:10 +00003077
Daniel Veillardcf461992000-03-14 18:30:20 +00003078 c = CUR_SCHAR(cur, l);
3079 if (!IS_LETTER(c) && (c != '_')) return(NULL);
3080
3081 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3082 (c == '.') || (c == '-') ||
3083 (c == '_') ||
3084 (IS_COMBINING(c)) ||
3085 (IS_EXTENDER(c))) {
3086 COPY_BUF(l,buf,len,c);
3087 cur += l;
3088 c = CUR_SCHAR(cur, l);
3089 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003090
Daniel Veillardcf461992000-03-14 18:30:20 +00003091 ret = xmlStrndup(buf, len);
Daniel Veillard517752b1999-04-05 12:20:10 +00003092
Daniel Veillardcf461992000-03-14 18:30:20 +00003093 if (c == ':') {
3094 cur += l;
3095 c = CUR_SCHAR(cur, l);
3096 if (!IS_LETTER(c) && (c != '_')) return(ret);
Daniel Veillard517752b1999-04-05 12:20:10 +00003097 *prefix = ret;
Daniel Veillardcf461992000-03-14 18:30:20 +00003098 len = 0;
Daniel Veillard517752b1999-04-05 12:20:10 +00003099
Daniel Veillardcf461992000-03-14 18:30:20 +00003100 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3101 (c == '.') || (c == '-') ||
3102 (c == '_') ||
3103 (IS_COMBINING(c)) ||
3104 (IS_EXTENDER(c))) {
3105 COPY_BUF(l,buf,len,c);
3106 cur += l;
3107 c = CUR_SCHAR(cur, l);
3108 }
Daniel Veillard517752b1999-04-05 12:20:10 +00003109
Daniel Veillardcf461992000-03-14 18:30:20 +00003110 ret = xmlStrndup(buf, len);
Daniel Veillard517752b1999-04-05 12:20:10 +00003111 }
3112
3113 return(ret);
3114}
3115/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003116 * xmlNamespaceParseNSDef:
3117 * @ctxt: an XML parser context
3118 *
3119 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003120 *
3121 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3122 *
3123 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +00003124 *
3125 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003126 */
3127
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003128xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003129xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003130 xmlChar *name = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003131
Daniel Veillardcf461992000-03-14 18:30:20 +00003132 if ((RAW == 'x') && (NXT(1) == 'm') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003133 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3134 (NXT(4) == 's')) {
3135 SKIP(5);
Daniel Veillardcf461992000-03-14 18:30:20 +00003136 if (RAW == ':') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003137 NEXT;
3138 name = xmlNamespaceParseNCName(ctxt);
3139 }
3140 }
3141 return(name);
3142}
3143
Daniel Veillard11e00581998-10-24 18:27:49 +00003144/**
3145 * xmlParseQuotedString:
3146 * @ctxt: an XML parser context
3147 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003148 * [OLD] Parse and return a string between quotes or doublequotes
Daniel Veillardb05deb71999-08-10 19:04:08 +00003149 * To be removed at next drop of binary compatibility
Daniel Veillard1e346af1999-02-22 10:33:01 +00003150 *
3151 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003152 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003153xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003154xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003155 xmlChar *buf = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003156 int len = 0,l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003157 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003158 int c;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003159
Daniel Veillard10a2c651999-12-12 13:03:50 +00003160 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3161 if (buf == NULL) {
3162 fprintf(stderr, "malloc of %d byte failed\n", size);
3163 return(NULL);
3164 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003165 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003166 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003167 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003168 while (IS_CHAR(c) && (c != '"')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003169 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003170 size *= 2;
3171 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3172 if (buf == NULL) {
3173 fprintf(stderr, "realloc of %d byte failed\n", size);
3174 return(NULL);
3175 }
3176 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003177 COPY_BUF(l,buf,len,c);
3178 NEXTL(l);
3179 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003180 }
3181 if (c != '"') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003182 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003184 ctxt->sax->error(ctxt->userData,
3185 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003186 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003187 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003188 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003189 NEXT;
3190 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003191 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00003192 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003193 c = CUR;
3194 while (IS_CHAR(c) && (c != '\'')) {
3195 if (len + 1 >= size) {
3196 size *= 2;
3197 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3198 if (buf == NULL) {
3199 fprintf(stderr, "realloc of %d byte failed\n", size);
3200 return(NULL);
3201 }
3202 }
3203 buf[len++] = c;
3204 NEXT;
3205 c = CUR;
3206 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003207 if (RAW != '\'') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003208 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003209 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003210 ctxt->sax->error(ctxt->userData,
3211 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003212 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003213 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003214 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003215 NEXT;
3216 }
3217 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003218 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003219}
3220
Daniel Veillard11e00581998-10-24 18:27:49 +00003221/**
3222 * xmlParseNamespace:
3223 * @ctxt: an XML parser context
3224 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003225 * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3226 *
3227 * This is what the older xml-name Working Draft specified, a bunch of
3228 * other stuff may still rely on it, so support is still here as
Daniel Veillard51e3b151999-11-12 17:02:31 +00003229 * if it was declared on the root of the Tree:-(
Daniel Veillardb05deb71999-08-10 19:04:08 +00003230 *
3231 * To be removed at next drop of binary compatibility
Daniel Veillard260a68f1998-08-13 03:39:55 +00003232 */
3233
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003234void
3235xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003236 xmlChar *href = NULL;
3237 xmlChar *prefix = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003238 int garbage = 0;
3239
3240 /*
3241 * We just skipped "namespace" or "xml:namespace"
3242 */
3243 SKIP_BLANKS;
3244
Daniel Veillardcf461992000-03-14 18:30:20 +00003245 while (IS_CHAR(RAW) && (RAW != '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003246 /*
3247 * We can have "ns" or "prefix" attributes
3248 * Old encoding as 'href' or 'AS' attributes is still supported
3249 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003250 if ((RAW == 'n') && (NXT(1) == 's')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003251 garbage = 0;
3252 SKIP(2);
3253 SKIP_BLANKS;
3254
Daniel Veillardcf461992000-03-14 18:30:20 +00003255 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003256 NEXT;
3257 SKIP_BLANKS;
3258
3259 href = xmlParseQuotedString(ctxt);
3260 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003261 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003262 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3263 garbage = 0;
3264 SKIP(4);
3265 SKIP_BLANKS;
3266
Daniel Veillardcf461992000-03-14 18:30:20 +00003267 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003268 NEXT;
3269 SKIP_BLANKS;
3270
3271 href = xmlParseQuotedString(ctxt);
3272 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003273 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003274 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3275 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3276 garbage = 0;
3277 SKIP(6);
3278 SKIP_BLANKS;
3279
Daniel Veillardcf461992000-03-14 18:30:20 +00003280 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003281 NEXT;
3282 SKIP_BLANKS;
3283
3284 prefix = xmlParseQuotedString(ctxt);
3285 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003286 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003287 garbage = 0;
3288 SKIP(2);
3289 SKIP_BLANKS;
3290
Daniel Veillardcf461992000-03-14 18:30:20 +00003291 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003292 NEXT;
3293 SKIP_BLANKS;
3294
3295 prefix = xmlParseQuotedString(ctxt);
3296 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003297 } else if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003298 garbage = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003299 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003300 } else {
3301 /*
3302 * Found garbage when parsing the namespace
3303 */
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003304 if (!garbage) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003306 ctxt->sax->error(ctxt->userData,
3307 "xmlParseNamespace found garbage\n");
3308 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003309 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003310 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003311 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003312 NEXT;
3313 }
3314 }
3315
3316 MOVETO_ENDTAG(CUR_PTR);
3317 NEXT;
3318
3319 /*
3320 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003321 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +00003322 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003323 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
Daniel Veillard517752b1999-04-05 12:20:10 +00003324 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003325
Daniel Veillard6454aec1999-09-02 22:04:43 +00003326 if (prefix != NULL) xmlFree(prefix);
3327 if (href != NULL) xmlFree(href);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003328}
3329
3330/************************************************************************
3331 * *
3332 * The parser itself *
3333 * Relates to http://www.w3.org/TR/REC-xml *
3334 * *
3335 ************************************************************************/
3336
Daniel Veillard11e00581998-10-24 18:27:49 +00003337/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00003338 * xmlScanName:
3339 * @ctxt: an XML parser context
3340 *
3341 * Trickery: parse an XML name but without consuming the input flow
3342 * Needed for rollback cases.
3343 *
3344 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3345 * CombiningChar | Extender
3346 *
3347 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3348 *
3349 * [6] Names ::= Name (S Name)*
3350 *
3351 * Returns the Name parsed or NULL
3352 */
3353
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003354xmlChar *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003355xmlScanName(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003356 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillardb05deb71999-08-10 19:04:08 +00003357 int len = 0;
3358
3359 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003360 if (!IS_LETTER(RAW) && (RAW != '_') &&
3361 (RAW != ':')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003362 return(NULL);
3363 }
3364
3365 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3366 (NXT(len) == '.') || (NXT(len) == '-') ||
3367 (NXT(len) == '_') || (NXT(len) == ':') ||
3368 (IS_COMBINING(NXT(len))) ||
3369 (IS_EXTENDER(NXT(len)))) {
3370 buf[len] = NXT(len);
3371 len++;
3372 if (len >= XML_MAX_NAMELEN) {
3373 fprintf(stderr,
3374 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3375 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3376 (NXT(len) == '.') || (NXT(len) == '-') ||
3377 (NXT(len) == '_') || (NXT(len) == ':') ||
3378 (IS_COMBINING(NXT(len))) ||
3379 (IS_EXTENDER(NXT(len))))
3380 len++;
3381 break;
3382 }
3383 }
3384 return(xmlStrndup(buf, len));
3385}
3386
3387/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003388 * xmlParseName:
3389 * @ctxt: an XML parser context
3390 *
3391 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003392 *
3393 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3394 * CombiningChar | Extender
3395 *
3396 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3397 *
3398 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003399 *
3400 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003401 */
3402
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003403xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003404xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003405 xmlChar buf[XML_MAX_NAMELEN + 5];
3406 int len = 0, l;
3407 int c;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003408
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003409 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003410 c = CUR_CHAR(l);
3411 if (!IS_LETTER(c) && (c != '_') &&
3412 (c != ':')) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003413 return(NULL);
3414 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003415
Daniel Veillardcf461992000-03-14 18:30:20 +00003416 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3417 (c == '.') || (c == '-') ||
3418 (c == '_') || (c == ':') ||
3419 (IS_COMBINING(c)) ||
3420 (IS_EXTENDER(c))) {
3421 COPY_BUF(l,buf,len,c);
3422 NEXTL(l);
3423 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003424 if (len >= XML_MAX_NAMELEN) {
3425 fprintf(stderr,
3426 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003427 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3428 (c == '.') || (c == '-') ||
3429 (c == '_') || (c == ':') ||
3430 (IS_COMBINING(c)) ||
3431 (IS_EXTENDER(c))) {
3432 NEXTL(l);
3433 c = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00003434 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003435 break;
3436 }
3437 }
3438 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003439}
3440
Daniel Veillard11e00581998-10-24 18:27:49 +00003441/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00003442 * xmlParseStringName:
3443 * @ctxt: an XML parser context
3444 * @str: a pointer to an index in the string
3445 *
3446 * parse an XML name.
3447 *
3448 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3449 * CombiningChar | Extender
3450 *
3451 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3452 *
3453 * [6] Names ::= Name (S Name)*
3454 *
3455 * Returns the Name parsed or NULL. The str pointer
3456 * is updated to the current location in the string.
3457 */
3458
3459xmlChar *
3460xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003461 xmlChar buf[XML_MAX_NAMELEN + 5];
3462 const xmlChar *cur = *str;
3463 int len = 0, l;
3464 int c;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003465
Daniel Veillardcf461992000-03-14 18:30:20 +00003466 c = CUR_SCHAR(cur, l);
3467 if (!IS_LETTER(c) && (c != '_') &&
3468 (c != ':')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003469 return(NULL);
3470 }
3471
Daniel Veillardcf461992000-03-14 18:30:20 +00003472 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3473 (c == '.') || (c == '-') ||
3474 (c == '_') || (c == ':') ||
3475 (IS_COMBINING(c)) ||
3476 (IS_EXTENDER(c))) {
3477 COPY_BUF(l,buf,len,c);
3478 cur += l;
3479 c = CUR_SCHAR(cur, l);
3480 if (len >= XML_MAX_NAMELEN) {
3481 fprintf(stderr,
3482 "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3483 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3484 (c == '.') || (c == '-') ||
3485 (c == '_') || (c == ':') ||
3486 (IS_COMBINING(c)) ||
3487 (IS_EXTENDER(c))) {
3488 cur += l;
3489 c = CUR_SCHAR(cur, l);
3490 }
3491 break;
3492 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003493 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003494 *str = cur;
3495 return(xmlStrndup(buf, len));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003496}
3497
3498/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003499 * xmlParseNmtoken:
3500 * @ctxt: an XML parser context
3501 *
3502 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003503 *
3504 * [7] Nmtoken ::= (NameChar)+
3505 *
3506 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003507 *
3508 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003509 */
3510
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003511xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003512xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003513 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003514 int len = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003515 int c,l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003516
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003517 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003518 c = CUR_CHAR(l);
3519 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3520 (c == '.') || (c == '-') ||
3521 (c == '_') || (c == ':') ||
3522 (IS_COMBINING(c)) ||
3523 (IS_EXTENDER(c))) {
3524 COPY_BUF(l,buf,len,c);
3525 NEXTL(l);
3526 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003527 if (len >= XML_MAX_NAMELEN) {
3528 fprintf(stderr,
3529 "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003530 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3531 (c == '.') || (c == '-') ||
3532 (c == '_') || (c == ':') ||
3533 (IS_COMBINING(c)) ||
3534 (IS_EXTENDER(c))) {
3535 NEXTL(l);
3536 c = CUR_CHAR(l);
3537 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003538 break;
3539 }
3540 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003541 if (len == 0)
3542 return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003543 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003544}
3545
Daniel Veillard11e00581998-10-24 18:27:49 +00003546/**
3547 * xmlParseEntityValue:
3548 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00003549 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00003550 *
3551 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003552 *
3553 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3554 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00003555 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00003556 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003557 */
3558
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003559xmlChar *
3560xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003561 xmlChar *buf = NULL;
3562 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003563 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003564 int c, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003565 xmlChar stop;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003566 xmlChar *ret = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003567 const xmlChar *cur = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003568 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003569
Daniel Veillardcf461992000-03-14 18:30:20 +00003570 if (RAW == '"') stop = '"';
3571 else if (RAW == '\'') stop = '\'';
Daniel Veillard10a2c651999-12-12 13:03:50 +00003572 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003573 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00003575 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003576 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003577 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003578 return(NULL);
3579 }
3580 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3581 if (buf == NULL) {
3582 fprintf(stderr, "malloc of %d byte failed\n", size);
3583 return(NULL);
3584 }
3585
3586 /*
3587 * The content of the entity definition is copied in a buffer.
3588 */
3589
3590 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3591 input = ctxt->input;
3592 GROW;
3593 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003594 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003595 /*
3596 * NOTE: 4.4.5 Included in Literal
3597 * When a parameter entity reference appears in a literal entity
3598 * value, ... a single or double quote character in the replacement
3599 * text is always treated as a normal data character and will not
3600 * terminate the literal.
3601 * In practice it means we stop the loop only when back at parsing
3602 * the initial entity and the quote is found
3603 */
3604 while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003605 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003606 size *= 2;
3607 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3608 if (buf == NULL) {
3609 fprintf(stderr, "realloc of %d byte failed\n", size);
3610 return(NULL);
3611 }
3612 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003613 COPY_BUF(l,buf,len,c);
3614 NEXTL(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003615 /*
3616 * Pop-up of finished entities.
3617 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003618 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003619 xmlPopInput(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003620
3621 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003622 if (c == 0) {
3623 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003624 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003625 }
3626 }
3627 buf[len] = 0;
3628
3629 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00003630 * Raise problem w.r.t. '&' and '%' being used in non-entities
3631 * reference constructs. Note Charref will be handled in
3632 * xmlStringDecodeEntities()
3633 */
3634 cur = buf;
3635 while (*cur != 0) {
3636 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3637 xmlChar *name;
3638 xmlChar tmp = *cur;
3639
3640 cur++;
3641 name = xmlParseStringName(ctxt, &cur);
3642 if ((name == NULL) || (*cur != ';')) {
3643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3644 ctxt->sax->error(ctxt->userData,
3645 "EntityValue: '%c' forbidden except for entities references\n",
3646 tmp);
3647 ctxt->wellFormed = 0;
3648 ctxt->disableSAX = 1;
3649 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3650 }
3651 if ((ctxt->inSubset == 1) && (tmp == '%')) {
3652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3653 ctxt->sax->error(ctxt->userData,
3654 "EntityValue: PEReferences forbidden in internal subset\n",
3655 tmp);
3656 ctxt->wellFormed = 0;
3657 ctxt->disableSAX = 1;
3658 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3659 }
3660 if (name != NULL)
3661 xmlFree(name);
3662 }
3663 cur++;
3664 }
3665
3666 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00003667 * Then PEReference entities are substituted.
3668 */
3669 if (c != stop) {
3670 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3672 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
3673 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003674 ctxt->disableSAX = 1;
3675 xmlFree(buf);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003676 } else {
3677 NEXT;
3678 /*
3679 * NOTE: 4.4.7 Bypassed
3680 * When a general entity reference appears in the EntityValue in
3681 * an entity declaration, it is bypassed and left as is.
Daniel Veillardcf461992000-03-14 18:30:20 +00003682 * so XML_SUBSTITUTE_REF is not set here.
Daniel Veillard10a2c651999-12-12 13:03:50 +00003683 */
3684 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3685 0, 0, 0);
3686 if (orig != NULL)
3687 *orig = buf;
3688 else
3689 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003690 }
3691
3692 return(ret);
3693}
3694
Daniel Veillard11e00581998-10-24 18:27:49 +00003695/**
3696 * xmlParseAttValue:
3697 * @ctxt: an XML parser context
3698 *
3699 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00003700 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00003701 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00003702 *
3703 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3704 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00003705 *
Daniel Veillard7f858501999-11-17 17:32:38 +00003706 * 3.3.3 Attribute-Value Normalization:
3707 * Before the value of an attribute is passed to the application or
3708 * checked for validity, the XML processor must normalize it as follows:
3709 * - a character reference is processed by appending the referenced
3710 * character to the attribute value
3711 * - an entity reference is processed by recursively processing the
3712 * replacement text of the entity
3713 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3714 * appending #x20 to the normalized value, except that only a single
3715 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3716 * parsed entity or the literal entity value of an internal parsed entity
3717 * - other characters are processed by appending them to the normalized value
Daniel Veillard07136651999-11-18 14:02:05 +00003718 * If the declared value is not CDATA, then the XML processor must further
3719 * process the normalized attribute value by discarding any leading and
3720 * trailing space (#x20) characters, and by replacing sequences of space
3721 * (#x20) characters by a single space (#x20) character.
3722 * All attributes for which no declaration has been read should be treated
3723 * by a non-validating parser as if declared CDATA.
Daniel Veillard7f858501999-11-17 17:32:38 +00003724 *
3725 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003726 */
3727
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003728xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003729xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard7f858501999-11-17 17:32:38 +00003730 xmlChar limit = 0;
3731 xmlChar *buffer = NULL;
3732 int buffer_size = 0;
3733 xmlChar *out = NULL;
3734
3735 xmlChar *current = NULL;
3736 xmlEntityPtr ent;
3737 xmlChar cur;
Daniel Veillard7f858501999-11-17 17:32:38 +00003738
Daniel Veillard260a68f1998-08-13 03:39:55 +00003739
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003740 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003741 if (NXT(0) == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003742 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard7f858501999-11-17 17:32:38 +00003743 limit = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00003744 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003745 } else if (NXT(0) == '\'') {
Daniel Veillard7f858501999-11-17 17:32:38 +00003746 limit = '\'';
Daniel Veillardb05deb71999-08-10 19:04:08 +00003747 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003748 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003749 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003750 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003752 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003753 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003754 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00003755 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003756 }
3757
Daniel Veillard7f858501999-11-17 17:32:38 +00003758 /*
3759 * allocate a translation buffer.
3760 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003761 buffer_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard7f858501999-11-17 17:32:38 +00003762 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3763 if (buffer == NULL) {
3764 perror("xmlParseAttValue: malloc failed");
3765 return(NULL);
3766 }
3767 out = buffer;
3768
3769 /*
3770 * Ok loop until we reach one of the ending char or a size limit.
3771 */
3772 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00003773 while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
Daniel Veillard7f858501999-11-17 17:32:38 +00003774 if (cur == 0) break;
3775 if ((cur == '&') && (NXT(1) == '#')) {
3776 int val = xmlParseCharRef(ctxt);
3777 *out++ = val;
Daniel Veillard7f858501999-11-17 17:32:38 +00003778 } else if (cur == '&') {
3779 ent = xmlParseEntityRef(ctxt);
3780 if ((ent != NULL) &&
3781 (ctxt->replaceEntities != 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003782 xmlChar *rep;
Daniel Veillard7f858501999-11-17 17:32:38 +00003783
Daniel Veillardcf461992000-03-14 18:30:20 +00003784 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3785 rep = xmlStringDecodeEntities(ctxt, ent->content,
3786 XML_SUBSTITUTE_REF, 0, 0, 0);
3787 if (rep != NULL) {
3788 current = rep;
3789 while (*current != 0) {
3790 *out++ = *current++;
3791 if (out - buffer > buffer_size - 10) {
3792 int index = out - buffer;
3793
3794 growBuffer(buffer);
3795 out = &buffer[index];
3796 }
3797 }
3798 xmlFree(rep);
Daniel Veillard7f858501999-11-17 17:32:38 +00003799 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003800 } else {
3801 if (ent->content != NULL)
3802 *out++ = ent->content[0];
Daniel Veillard7f858501999-11-17 17:32:38 +00003803 }
3804 } else if (ent != NULL) {
3805 int i = xmlStrlen(ent->name);
3806 const xmlChar *cur = ent->name;
3807
Daniel Veillardcf461992000-03-14 18:30:20 +00003808 /*
3809 * This may look absurd but is needed to detect
3810 * entities problems
3811 */
3812 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3813 xmlChar *rep;
3814 rep = xmlStringDecodeEntities(ctxt, ent->content,
3815 XML_SUBSTITUTE_REF, 0, 0, 0);
3816 if (rep != NULL)
3817 xmlFree(rep);
3818 }
3819
3820 /*
3821 * Just output the reference
3822 */
Daniel Veillard7f858501999-11-17 17:32:38 +00003823 *out++ = '&';
3824 if (out - buffer > buffer_size - i - 10) {
3825 int index = out - buffer;
3826
3827 growBuffer(buffer);
3828 out = &buffer[index];
3829 }
3830 for (;i > 0;i--)
3831 *out++ = *cur++;
3832 *out++ = ';';
3833 }
Daniel Veillard7f858501999-11-17 17:32:38 +00003834 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00003835 /* invalid for UTF-8 , use COPY(out); !!! */
Daniel Veillard7f858501999-11-17 17:32:38 +00003836 if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
Daniel Veillard07136651999-11-18 14:02:05 +00003837 *out++ = 0x20;
3838 if (out - buffer > buffer_size - 10) {
3839 int index = out - buffer;
3840
3841 growBuffer(buffer);
3842 out = &buffer[index];
Daniel Veillard7f858501999-11-17 17:32:38 +00003843 }
Daniel Veillard7f858501999-11-17 17:32:38 +00003844 } else {
3845 *out++ = cur;
3846 if (out - buffer > buffer_size - 10) {
3847 int index = out - buffer;
3848
3849 growBuffer(buffer);
3850 out = &buffer[index];
3851 }
Daniel Veillard7f858501999-11-17 17:32:38 +00003852 }
3853 NEXT;
3854 }
3855 cur = CUR;
3856 }
3857 *out++ = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003858 if (RAW == '<') {
Daniel Veillard7f858501999-11-17 17:32:38 +00003859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3860 ctxt->sax->error(ctxt->userData,
3861 "Unescaped '<' not allowed in attributes values\n");
3862 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3863 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003864 ctxt->disableSAX = 1;
3865 } else if (RAW != limit) {
Daniel Veillard7f858501999-11-17 17:32:38 +00003866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3867 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3868 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3869 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003870 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00003871 } else
3872 NEXT;
3873 return(buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003874}
3875
Daniel Veillard11e00581998-10-24 18:27:49 +00003876/**
3877 * xmlParseSystemLiteral:
3878 * @ctxt: an XML parser context
3879 *
3880 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00003881 *
3882 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00003883 *
3884 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003885 */
3886
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003887xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003888xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003889 xmlChar *buf = NULL;
3890 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003891 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003892 int cur, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003893 xmlChar stop;
Daniel Veillardcf461992000-03-14 18:30:20 +00003894 int state = ctxt->instate;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003895
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003896 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003897 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003898 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003899 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00003900 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003901 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003902 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00003903 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003904 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003905 ctxt->sax->error(ctxt->userData,
3906 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003907 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003908 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003909 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003910 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003911 }
3912
Daniel Veillard10a2c651999-12-12 13:03:50 +00003913 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3914 if (buf == NULL) {
3915 fprintf(stderr, "malloc of %d byte failed\n", size);
3916 return(NULL);
3917 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003918 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3919 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003920 while ((IS_CHAR(cur)) && (cur != stop)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003921 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003922 size *= 2;
3923 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3924 if (buf == NULL) {
3925 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillardcf461992000-03-14 18:30:20 +00003926 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003927 return(NULL);
3928 }
3929 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003930 COPY_BUF(l,buf,len,cur);
3931 NEXTL(l);
3932 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003933 if (cur == 0) {
3934 GROW;
3935 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003936 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003937 }
3938 }
3939 buf[len] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003940 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003941 if (!IS_CHAR(cur)) {
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3944 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3945 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003946 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003947 } else {
3948 NEXT;
3949 }
3950 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003951}
3952
Daniel Veillard11e00581998-10-24 18:27:49 +00003953/**
3954 * xmlParsePubidLiteral:
3955 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00003956 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003957 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00003958 *
3959 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3960 *
3961 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003962 */
3963
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003964xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003965xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003966 xmlChar *buf = NULL;
3967 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003968 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003969 xmlChar cur;
3970 xmlChar stop;
Daniel Veillard6077d031999-10-09 09:11:45 +00003971
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003972 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00003973 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003974 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003975 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00003976 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003977 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003978 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00003979 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003981 ctxt->sax->error(ctxt->userData,
3982 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003983 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003984 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003985 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003986 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003987 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003988 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3989 if (buf == NULL) {
3990 fprintf(stderr, "malloc of %d byte failed\n", size);
3991 return(NULL);
3992 }
3993 cur = CUR;
3994 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3995 if (len + 1 >= size) {
3996 size *= 2;
3997 buf = xmlRealloc(buf, size * sizeof(xmlChar));
3998 if (buf == NULL) {
3999 fprintf(stderr, "realloc of %d byte failed\n", size);
4000 return(NULL);
4001 }
4002 }
4003 buf[len++] = cur;
4004 NEXT;
4005 cur = CUR;
4006 if (cur == 0) {
4007 GROW;
4008 SHRINK;
4009 cur = CUR;
4010 }
4011 }
4012 buf[len] = 0;
4013 if (cur != stop) {
4014 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4015 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4016 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4017 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004018 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004019 } else {
4020 NEXT;
4021 }
4022 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004023}
4024
Daniel Veillard11e00581998-10-24 18:27:49 +00004025/**
4026 * xmlParseCharData:
4027 * @ctxt: an XML parser context
4028 * @cdata: int indicating whether we are within a CDATA section
4029 *
4030 * parse a CharData section.
4031 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004032 *
Daniel Veillardcf461992000-03-14 18:30:20 +00004033 * The right angle bracket (>) may be represented using the string "&gt;",
4034 * and must, for compatibility, be escaped using "&gt;" or a character
4035 * reference when it appears in the string "]]>" in content, when that
4036 * string is not marking the end of a CDATA section.
4037 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004038 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4039 */
4040
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004041void
4042xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004043 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004044 int nbchar = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004045 int cur, l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004046
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004047 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004048 cur = CUR_CHAR(l);
4049 while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
4050 ((cur != '&') || (ctxt->token == '&'))) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004051 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004052 (NXT(2) == '>')) {
4053 if (cdata) break;
4054 else {
4055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00004056 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004057 "Sequence ']]>' not allowed in content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004058 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
Daniel Veillardcf461992000-03-14 18:30:20 +00004059 /* Should this be relaxed ??? I see a "must here */
4060 ctxt->wellFormed = 0;
4061 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004062 }
4063 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004064 COPY_BUF(l,buf,nbchar,cur);
4065 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004066 /*
4067 * Ok the segment is to be consumed as chars.
4068 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004069 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004070 if (areBlanks(ctxt, buf, nbchar)) {
4071 if (ctxt->sax->ignorableWhitespace != NULL)
4072 ctxt->sax->ignorableWhitespace(ctxt->userData,
4073 buf, nbchar);
4074 } else {
4075 if (ctxt->sax->characters != NULL)
4076 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4077 }
4078 }
4079 nbchar = 0;
4080 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004081 NEXTL(l);
4082 cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004083 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004084 if (nbchar != 0) {
4085 /*
4086 * Ok the segment is to be consumed as chars.
4087 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004088 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004089 if (areBlanks(ctxt, buf, nbchar)) {
4090 if (ctxt->sax->ignorableWhitespace != NULL)
4091 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4092 } else {
4093 if (ctxt->sax->characters != NULL)
4094 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4095 }
4096 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004097 }
4098}
4099
Daniel Veillard11e00581998-10-24 18:27:49 +00004100/**
4101 * xmlParseExternalID:
4102 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004103 * @publicID: a xmlChar** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00004104 * @strict: indicate whether we should restrict parsing to only
4105 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00004106 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004107 * Parse an External ID or a Public ID
4108 *
4109 * NOTE: Productions [75] and [83] interract badly since [75] can generate
4110 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00004111 *
4112 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4113 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00004114 *
4115 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4116 *
4117 * Returns the function returns SystemLiteral and in the second
4118 * case publicID receives PubidLiteral, is strict is off
4119 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004120 */
4121
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004122xmlChar *
4123xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4124 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004125
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004126 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004127 if ((RAW == 'S') && (NXT(1) == 'Y') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004128 (NXT(2) == 'S') && (NXT(3) == 'T') &&
4129 (NXT(4) == 'E') && (NXT(5) == 'M')) {
4130 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004131 if (!IS_BLANK(CUR)) {
4132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004133 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004134 "Space required after 'SYSTEM'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004135 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004136 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004137 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004138 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004139 SKIP_BLANKS;
4140 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004141 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004143 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004144 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004145 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004146 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004147 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004148 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004149 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004150 (NXT(2) == 'B') && (NXT(3) == 'L') &&
4151 (NXT(4) == 'I') && (NXT(5) == 'C')) {
4152 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004153 if (!IS_BLANK(CUR)) {
4154 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004155 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004156 "Space required after 'PUBLIC'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004157 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004158 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004159 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004160 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004161 SKIP_BLANKS;
4162 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004163 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004165 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004166 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004167 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004168 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004169 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004170 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00004171 if (strict) {
4172 /*
4173 * We don't handle [83] so "S SystemLiteral" is required.
4174 */
4175 if (!IS_BLANK(CUR)) {
4176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004177 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004178 "Space required after the Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004179 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004180 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004181 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004182 }
4183 } else {
4184 /*
4185 * We handle [83] so we return immediately, if
4186 * "S SystemLiteral" is not detected. From a purely parsing
4187 * point of view that's a nice mess.
4188 */
Daniel Veillard10a2c651999-12-12 13:03:50 +00004189 const xmlChar *ptr;
4190 GROW;
4191
4192 ptr = CUR_PTR;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004193 if (!IS_BLANK(*ptr)) return(NULL);
4194
4195 while (IS_BLANK(*ptr)) ptr++;
Daniel Veillardcf461992000-03-14 18:30:20 +00004196 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004197 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004198 SKIP_BLANKS;
4199 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004200 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004202 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004203 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004204 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004205 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004206 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004207 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004208 }
4209 return(URI);
4210}
4211
Daniel Veillard11e00581998-10-24 18:27:49 +00004212/**
4213 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00004214 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004215 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004216 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00004217 * The spec says that "For compatibility, the string "--" (double-hyphen)
4218 * must not occur within comments. "
4219 *
4220 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4221 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004222void
Daniel Veillardb96e6431999-08-29 21:02:19 +00004223xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004224 xmlChar *buf = NULL;
4225 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004226 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004227 int q, ql;
4228 int r, rl;
4229 int cur, l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004230 xmlParserInputState state;
Daniel Veillardcf461992000-03-14 18:30:20 +00004231 xmlParserInputPtr input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004232
4233 /*
4234 * Check that there is a comment right here.
4235 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004236 if ((RAW != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00004237 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004238
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004239 state = ctxt->instate;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004240 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004241 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004242 SKIP(4);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004243 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4244 if (buf == NULL) {
4245 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004246 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004247 return;
4248 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004249 q = CUR_CHAR(ql);
4250 NEXTL(ql);
4251 r = CUR_CHAR(rl);
4252 NEXTL(rl);
4253 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004254 while (IS_CHAR(cur) &&
4255 ((cur != '>') ||
4256 (r != '-') || (q != '-'))) {
4257 if ((r == '-') && (q == '-')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004259 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004260 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004261 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004262 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004263 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004264 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004265 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004266 size *= 2;
4267 buf = xmlRealloc(buf, size * sizeof(xmlChar));
4268 if (buf == NULL) {
4269 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004270 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004271 return;
4272 }
4273 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004274 COPY_BUF(ql,buf,len,q);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004275 q = r;
Daniel Veillardcf461992000-03-14 18:30:20 +00004276 ql = rl;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004277 r = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00004278 rl = l;
4279 NEXTL(l);
4280 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004281 if (cur == 0) {
4282 SHRINK;
4283 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004284 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004285 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004286 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004287 buf[len] = 0;
4288 if (!IS_CHAR(cur)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004290 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00004291 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004292 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004293 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004294 ctxt->disableSAX = 1;
4295 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004296 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004297 if (input != ctxt->input) {
4298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4299 ctxt->sax->error(ctxt->userData,
4300"Comment doesn't start and stop in the same entity\n");
4301 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4302 ctxt->wellFormed = 0;
4303 ctxt->disableSAX = 1;
4304 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004305 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004306 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4307 (!ctxt->disableSAX))
Daniel Veillard10a2c651999-12-12 13:03:50 +00004308 ctxt->sax->comment(ctxt->userData, buf);
4309 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004310 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004311 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004312}
4313
Daniel Veillard11e00581998-10-24 18:27:49 +00004314/**
4315 * xmlParsePITarget:
4316 * @ctxt: an XML parser context
4317 *
4318 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00004319 *
4320 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00004321 *
4322 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004323 */
4324
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004325xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004326xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004327 xmlChar *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004328
4329 name = xmlParseName(ctxt);
Daniel Veillard3c558c31999-12-22 11:30:41 +00004330 if ((name != NULL) &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004331 ((name[0] == 'x') || (name[0] == 'X')) &&
4332 ((name[1] == 'm') || (name[1] == 'M')) &&
4333 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillard3c558c31999-12-22 11:30:41 +00004334 int i;
Daniel Veillardcf461992000-03-14 18:30:20 +00004335 if ((name[0] == 'x') && (name[1] == 'm') &&
4336 (name[2] == 'l') && (name[3] == 0)) {
4337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4338 ctxt->sax->error(ctxt->userData,
4339 "XML declaration allowed only at the start of the document\n");
4340 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4341 ctxt->wellFormed = 0;
4342 ctxt->disableSAX = 1;
4343 return(name);
4344 } else if (name[3] == 0) {
4345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4346 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4347 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4348 ctxt->wellFormed = 0;
4349 ctxt->disableSAX = 1;
4350 return(name);
4351 }
Daniel Veillard3c558c31999-12-22 11:30:41 +00004352 for (i = 0;;i++) {
4353 if (xmlW3CPIs[i] == NULL) break;
4354 if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4355 return(name);
4356 }
4357 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4358 ctxt->sax->warning(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004359 "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004360 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004361 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004362 }
4363 return(name);
4364}
4365
Daniel Veillard11e00581998-10-24 18:27:49 +00004366/**
4367 * xmlParsePI:
4368 * @ctxt: an XML parser context
4369 *
4370 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004371 *
4372 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00004373 *
4374 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004375 */
4376
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004377void
4378xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004379 xmlChar *buf = NULL;
4380 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004381 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004382 int cur, l;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004383 xmlChar *target;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004384 xmlParserInputState state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004385
Daniel Veillardcf461992000-03-14 18:30:20 +00004386 if ((RAW == '<') && (NXT(1) == '?')) {
4387 xmlParserInputPtr input = ctxt->input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004388 state = ctxt->instate;
4389 ctxt->instate = XML_PARSER_PI;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004390 /*
4391 * this is a Processing Instruction.
4392 */
4393 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004394 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004395
4396 /*
4397 * Parse the target name and check for special support like
4398 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004399 */
4400 target = xmlParsePITarget(ctxt);
4401 if (target != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004402 if ((RAW == '?') && (NXT(1) == '>')) {
4403 if (input != ctxt->input) {
4404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4405 ctxt->sax->error(ctxt->userData,
4406 "PI declaration doesn't start and stop in the same entity\n");
4407 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4408 ctxt->wellFormed = 0;
4409 ctxt->disableSAX = 1;
4410 }
4411 SKIP(2);
4412
4413 /*
4414 * SAX: PI detected.
4415 */
4416 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4417 (ctxt->sax->processingInstruction != NULL))
4418 ctxt->sax->processingInstruction(ctxt->userData,
4419 target, NULL);
4420 ctxt->instate = state;
4421 xmlFree(target);
4422 return;
4423 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004424 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4425 if (buf == NULL) {
4426 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004427 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004428 return;
4429 }
4430 cur = CUR;
4431 if (!IS_BLANK(cur)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00004432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4433 ctxt->sax->error(ctxt->userData,
4434 "xmlParsePI: PI %s space expected\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004435 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004436 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004437 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00004438 }
4439 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004440 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004441 while (IS_CHAR(cur) &&
4442 ((cur != '?') || (NXT(1) != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004443 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004444 size *= 2;
4445 buf = xmlRealloc(buf, size * sizeof(xmlChar));
4446 if (buf == NULL) {
4447 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004448 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004449 return;
4450 }
4451 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004452 COPY_BUF(l,buf,len,cur);
4453 NEXTL(l);
4454 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004455 if (cur == 0) {
4456 SHRINK;
4457 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004458 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004459 }
4460 }
4461 buf[len] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004462 if (cur != '?') {
Daniel Veillard517752b1999-04-05 12:20:10 +00004463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004464 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00004465 "xmlParsePI: PI %s never end ...\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004466 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
Daniel Veillard517752b1999-04-05 12:20:10 +00004467 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004468 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004469 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004470 if (input != ctxt->input) {
4471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4472 ctxt->sax->error(ctxt->userData,
4473 "PI declaration doesn't start and stop in the same entity\n");
4474 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4475 ctxt->wellFormed = 0;
4476 ctxt->disableSAX = 1;
4477 }
Daniel Veillard517752b1999-04-05 12:20:10 +00004478 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004479
Daniel Veillard517752b1999-04-05 12:20:10 +00004480 /*
4481 * SAX: PI detected.
4482 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004483 if ((ctxt->sax) && (!ctxt->disableSAX) &&
Daniel Veillard517752b1999-04-05 12:20:10 +00004484 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004485 ctxt->sax->processingInstruction(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00004486 target, buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004487 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004488 xmlFree(buf);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004489 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004490 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00004492 ctxt->sax->error(ctxt->userData,
4493 "xmlParsePI : no target name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004494 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004495 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004496 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004497 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004498 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004499 }
4500}
4501
Daniel Veillard11e00581998-10-24 18:27:49 +00004502/**
4503 * xmlParseNotationDecl:
4504 * @ctxt: an XML parser context
4505 *
4506 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00004507 *
4508 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4509 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004510 * Hence there is actually 3 choices:
4511 * 'PUBLIC' S PubidLiteral
4512 * 'PUBLIC' S PubidLiteral S SystemLiteral
4513 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00004514 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004515 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00004516 */
4517
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004518void
4519xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004520 xmlChar *name;
4521 xmlChar *Pubid;
4522 xmlChar *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004523
Daniel Veillardcf461992000-03-14 18:30:20 +00004524 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004525 (NXT(2) == 'N') && (NXT(3) == 'O') &&
4526 (NXT(4) == 'T') && (NXT(5) == 'A') &&
4527 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00004528 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004529 xmlParserInputPtr input = ctxt->input;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004530 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004531 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004532 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004534 ctxt->sax->error(ctxt->userData,
4535 "Space required after '<!NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004536 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004537 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004538 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004539 return;
4540 }
4541 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004542
4543 name = xmlParseName(ctxt);
4544 if (name == NULL) {
4545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004546 ctxt->sax->error(ctxt->userData,
4547 "NOTATION: Name expected here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004548 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004549 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004550 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004551 return;
4552 }
4553 if (!IS_BLANK(CUR)) {
4554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004555 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004556 "Space required after the NOTATION name'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004557 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004558 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004559 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004560 return;
4561 }
4562 SKIP_BLANKS;
4563
Daniel Veillard260a68f1998-08-13 03:39:55 +00004564 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00004565 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004566 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004567 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004568 SKIP_BLANKS;
4569
Daniel Veillardcf461992000-03-14 18:30:20 +00004570 if (RAW == '>') {
4571 if (input != ctxt->input) {
4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4573 ctxt->sax->error(ctxt->userData,
4574"Notation declaration doesn't start and stop in the same entity\n");
4575 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4576 ctxt->wellFormed = 0;
4577 ctxt->disableSAX = 1;
4578 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004579 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004580 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4581 (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004582 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004583 } else {
4584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004585 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004586 "'>' required to close NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004587 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004588 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004589 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004590 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00004591 xmlFree(name);
4592 if (Systemid != NULL) xmlFree(Systemid);
4593 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004594 }
4595}
4596
Daniel Veillard11e00581998-10-24 18:27:49 +00004597/**
4598 * xmlParseEntityDecl:
4599 * @ctxt: an XML parser context
4600 *
4601 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00004602 *
4603 * [70] EntityDecl ::= GEDecl | PEDecl
4604 *
4605 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4606 *
4607 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4608 *
4609 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4610 *
4611 * [74] PEDef ::= EntityValue | ExternalID
4612 *
4613 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00004614 *
4615 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004616 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004617 */
4618
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004619void
4620xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004621 xmlChar *name = NULL;
4622 xmlChar *value = NULL;
4623 xmlChar *URI = NULL, *literal = NULL;
4624 xmlChar *ndata = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004625 int isParameter = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004626 xmlChar *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004627
Daniel Veillardb05deb71999-08-10 19:04:08 +00004628 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004629 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004630 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4631 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004632 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004633 xmlParserInputPtr input = ctxt->input;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004634 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004635 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004636 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004637 if (!IS_BLANK(CUR)) {
4638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004639 ctxt->sax->error(ctxt->userData,
4640 "Space required after '<!ENTITY'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004641 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004642 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004643 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004644 }
4645 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004646
Daniel Veillardcf461992000-03-14 18:30:20 +00004647 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004648 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004649 if (!IS_BLANK(CUR)) {
4650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004651 ctxt->sax->error(ctxt->userData,
4652 "Space required after '%'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004653 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004654 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004655 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004656 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004657 SKIP_BLANKS;
4658 isParameter = 1;
4659 }
4660
4661 name = xmlParseName(ctxt);
4662 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004664 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004665 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004666 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004667 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004668 return;
4669 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004670 if (!IS_BLANK(CUR)) {
4671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004672 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004673 "Space required after the entity name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004674 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004675 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004676 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004677 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004678 SKIP_BLANKS;
4679
4680 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00004681 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00004682 */
4683 if (isParameter) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004684 if ((RAW == '"') || (RAW == '\''))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004685 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004686 if (value) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004687 if ((ctxt->sax != NULL) &&
4688 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004689 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004690 XML_INTERNAL_PARAMETER_ENTITY,
4691 NULL, NULL, value);
4692 }
4693 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00004694 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00004695 if ((URI == NULL) && (literal == NULL)) {
4696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4697 ctxt->sax->error(ctxt->userData,
4698 "Entity value required\n");
4699 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4700 ctxt->wellFormed = 0;
4701 ctxt->disableSAX = 1;
4702 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004703 if (URI) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004704 if ((ctxt->sax != NULL) &&
4705 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004706 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004707 XML_EXTERNAL_PARAMETER_ENTITY,
4708 literal, URI, NULL);
4709 }
4710 }
4711 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004712 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00004713 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillardcf461992000-03-14 18:30:20 +00004714 if ((ctxt->sax != NULL) &&
4715 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004716 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004717 XML_INTERNAL_GENERAL_ENTITY,
4718 NULL, NULL, value);
4719 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00004720 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00004721 if ((URI == NULL) && (literal == NULL)) {
4722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4723 ctxt->sax->error(ctxt->userData,
4724 "Entity value required\n");
4725 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4726 ctxt->wellFormed = 0;
4727 ctxt->disableSAX = 1;
4728 }
4729 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004731 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004732 "Space required before 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004733 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004734 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004735 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004736 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004737 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004738 if ((RAW == 'N') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004739 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4740 (NXT(4) == 'A')) {
4741 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004742 if (!IS_BLANK(CUR)) {
4743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004744 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004745 "Space required after 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004746 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004747 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004748 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004749 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004750 SKIP_BLANKS;
4751 ndata = xmlParseName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00004752 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00004753 (ctxt->sax->unparsedEntityDecl != NULL))
4754 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004755 literal, URI, ndata);
4756 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004757 if ((ctxt->sax != NULL) &&
4758 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004759 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004760 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4761 literal, URI, NULL);
4762 }
4763 }
4764 }
4765 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004766 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004768 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004769 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004770 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004771 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004772 ctxt->disableSAX = 1;
4773 } else {
4774 if (input != ctxt->input) {
4775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4776 ctxt->sax->error(ctxt->userData,
4777"Entity declaration doesn't start and stop in the same entity\n");
4778 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4779 ctxt->wellFormed = 0;
4780 ctxt->disableSAX = 1;
4781 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004782 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004783 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00004784 if (orig != NULL) {
4785 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00004786 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00004787 */
4788 xmlEntityPtr cur = NULL;
4789
Daniel Veillardb05deb71999-08-10 19:04:08 +00004790 if (isParameter) {
4791 if ((ctxt->sax != NULL) &&
4792 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00004793 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004794 } else {
4795 if ((ctxt->sax != NULL) &&
4796 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00004797 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004798 }
4799 if (cur != NULL) {
4800 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00004801 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004802 else
4803 cur->orig = orig;
4804 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00004805 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00004806 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00004807 if (name != NULL) xmlFree(name);
4808 if (value != NULL) xmlFree(value);
4809 if (URI != NULL) xmlFree(URI);
4810 if (literal != NULL) xmlFree(literal);
4811 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004812 }
4813}
4814
Daniel Veillard11e00581998-10-24 18:27:49 +00004815/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004816 * xmlParseDefaultDecl:
4817 * @ctxt: an XML parser context
4818 * @value: Receive a possible fixed default value for the attribute
4819 *
4820 * Parse an attribute default declaration
4821 *
4822 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4823 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004824 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004825 * if the default declaration is the keyword #REQUIRED, then the
4826 * attribute must be specified for all elements of the type in the
4827 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00004828 *
4829 * [ VC: Attribute Default Legal ]
4830 * The declared default value must meet the lexical constraints of
4831 * the declared attribute type c.f. xmlValidateAttributeDecl()
4832 *
4833 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004834 * if an attribute has a default value declared with the #FIXED
4835 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00004836 *
4837 * [ WFC: No < in Attribute Values ]
4838 * handled in xmlParseAttValue()
4839 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004840 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4841 * or XML_ATTRIBUTE_FIXED.
4842 */
4843
4844int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004845xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004846 int val;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004847 xmlChar *ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004848
4849 *value = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00004850 if ((RAW == '#') && (NXT(1) == 'R') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004851 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4852 (NXT(4) == 'U') && (NXT(5) == 'I') &&
4853 (NXT(6) == 'R') && (NXT(7) == 'E') &&
4854 (NXT(8) == 'D')) {
4855 SKIP(9);
4856 return(XML_ATTRIBUTE_REQUIRED);
4857 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004858 if ((RAW == '#') && (NXT(1) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004859 (NXT(2) == 'M') && (NXT(3) == 'P') &&
4860 (NXT(4) == 'L') && (NXT(5) == 'I') &&
4861 (NXT(6) == 'E') && (NXT(7) == 'D')) {
4862 SKIP(8);
4863 return(XML_ATTRIBUTE_IMPLIED);
4864 }
4865 val = XML_ATTRIBUTE_NONE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004866 if ((RAW == '#') && (NXT(1) == 'F') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004867 (NXT(2) == 'I') && (NXT(3) == 'X') &&
4868 (NXT(4) == 'E') && (NXT(5) == 'D')) {
4869 SKIP(6);
4870 val = XML_ATTRIBUTE_FIXED;
4871 if (!IS_BLANK(CUR)) {
4872 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004873 ctxt->sax->error(ctxt->userData,
4874 "Space required after '#FIXED'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004875 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004876 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004877 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004878 }
4879 SKIP_BLANKS;
4880 }
4881 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004882 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004883 if (ret == NULL) {
4884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004885 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004886 "Attribute default value declaration error\n");
4887 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004888 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004889 } else
4890 *value = ret;
4891 return(val);
4892}
4893
4894/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00004895 * xmlParseNotationType:
4896 * @ctxt: an XML parser context
4897 *
4898 * parse an Notation attribute type.
4899 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004900 * Note: the leading 'NOTATION' S part has already being parsed...
4901 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004902 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4903 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004904 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004905 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00004906 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00004907 *
4908 * Returns: the notation attribute tree built while parsing
4909 */
4910
4911xmlEnumerationPtr
4912xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004913 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004914 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4915
Daniel Veillardcf461992000-03-14 18:30:20 +00004916 if (RAW != '(') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00004917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004918 ctxt->sax->error(ctxt->userData,
4919 "'(' required to start 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004920 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004921 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004922 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004923 return(NULL);
4924 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004925 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004926 do {
4927 NEXT;
4928 SKIP_BLANKS;
4929 name = xmlParseName(ctxt);
4930 if (name == NULL) {
4931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004932 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004933 "Name expected in NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004934 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004935 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004936 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004937 return(ret);
4938 }
4939 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00004940 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004941 if (cur == NULL) return(ret);
4942 if (last == NULL) ret = last = cur;
4943 else {
4944 last->next = cur;
4945 last = cur;
4946 }
4947 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00004948 } while (RAW == '|');
4949 if (RAW != ')') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00004950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004951 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004952 "')' required to finish NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004953 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004954 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004955 ctxt->disableSAX = 1;
4956 if ((last != NULL) && (last != ret))
4957 xmlFreeEnumeration(last);
Daniel Veillard1e346af1999-02-22 10:33:01 +00004958 return(ret);
4959 }
4960 NEXT;
4961 return(ret);
4962}
4963
4964/**
4965 * xmlParseEnumerationType:
4966 * @ctxt: an XML parser context
4967 *
4968 * parse an Enumeration attribute type.
4969 *
4970 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4971 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00004972 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00004973 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00004974 * the declaration
4975 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004976 * Returns: the enumeration attribute tree built while parsing
4977 */
4978
4979xmlEnumerationPtr
4980xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004981 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004982 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4983
Daniel Veillardcf461992000-03-14 18:30:20 +00004984 if (RAW != '(') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00004985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004986 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004987 "'(' required to start ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004988 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004989 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004990 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004991 return(NULL);
4992 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004993 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004994 do {
4995 NEXT;
4996 SKIP_BLANKS;
4997 name = xmlParseNmtoken(ctxt);
4998 if (name == NULL) {
4999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005000 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005001 "NmToken expected in ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005002 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005003 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005004 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005005 return(ret);
5006 }
5007 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005008 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005009 if (cur == NULL) return(ret);
5010 if (last == NULL) ret = last = cur;
5011 else {
5012 last->next = cur;
5013 last = cur;
5014 }
5015 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005016 } while (RAW == '|');
5017 if (RAW != ')') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005019 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005020 "')' required to finish ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005021 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005022 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005023 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005024 return(ret);
5025 }
5026 NEXT;
5027 return(ret);
5028}
5029
5030/**
Daniel Veillard11e00581998-10-24 18:27:49 +00005031 * xmlParseEnumeratedType:
5032 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00005033 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00005034 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005035 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005036 *
5037 * [57] EnumeratedType ::= NotationType | Enumeration
5038 *
5039 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5040 *
Daniel Veillard11e00581998-10-24 18:27:49 +00005041 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005042 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00005043 */
5044
Daniel Veillard1e346af1999-02-22 10:33:01 +00005045int
5046xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005047 if ((RAW == 'N') && (NXT(1) == 'O') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005048 (NXT(2) == 'T') && (NXT(3) == 'A') &&
5049 (NXT(4) == 'T') && (NXT(5) == 'I') &&
5050 (NXT(6) == 'O') && (NXT(7) == 'N')) {
5051 SKIP(8);
5052 if (!IS_BLANK(CUR)) {
5053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005054 ctxt->sax->error(ctxt->userData,
5055 "Space required after 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005056 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005057 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005058 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005059 return(0);
5060 }
5061 SKIP_BLANKS;
5062 *tree = xmlParseNotationType(ctxt);
5063 if (*tree == NULL) return(0);
5064 return(XML_ATTRIBUTE_NOTATION);
5065 }
5066 *tree = xmlParseEnumerationType(ctxt);
5067 if (*tree == NULL) return(0);
5068 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005069}
5070
Daniel Veillard11e00581998-10-24 18:27:49 +00005071/**
5072 * xmlParseAttributeType:
5073 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00005074 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00005075 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005076 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00005077 *
5078 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5079 *
5080 * [55] StringType ::= 'CDATA'
5081 *
5082 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5083 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00005084 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005085 * Validity constraints for attribute values syntax are checked in
5086 * xmlValidateAttributeValue()
5087 *
5088 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005089 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00005090 * appear more than once in an XML document as a value of this type;
5091 * i.e., ID values must uniquely identify the elements which bear them.
5092 *
5093 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005094 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005095 *
5096 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005097 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005098 *
5099 * [ VC: IDREF ]
5100 * Values of type IDREF must match the Name production, and values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005101 * of type IDREFS must match Names; each IDREF Name must match the value
Daniel Veillardb96e6431999-08-29 21:02:19 +00005102 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00005103 * values must match the value of some ID attribute.
5104 *
5105 * [ VC: Entity Name ]
5106 * Values of type ENTITY must match the Name production, values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005107 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardb96e6431999-08-29 21:02:19 +00005108 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005109 *
5110 * [ VC: Name Token ]
5111 * Values of type NMTOKEN must match the Nmtoken production; values
5112 * of type NMTOKENS must match Nmtokens.
5113 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005114 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00005115 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005116int
Daniel Veillard1e346af1999-02-22 10:33:01 +00005117xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005118 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005119 if ((RAW == 'C') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005120 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5121 (NXT(4) == 'A')) {
5122 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005123 return(XML_ATTRIBUTE_CDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00005124 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005125 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005126 (NXT(4) == 'F') && (NXT(5) == 'S')) {
5127 SKIP(6);
5128 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillardcf461992000-03-14 18:30:20 +00005129 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005130 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005131 (NXT(4) == 'F')) {
5132 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005133 return(XML_ATTRIBUTE_IDREF);
Daniel Veillardcf461992000-03-14 18:30:20 +00005134 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005135 SKIP(2);
5136 return(XML_ATTRIBUTE_ID);
Daniel Veillardcf461992000-03-14 18:30:20 +00005137 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005138 (NXT(2) == 'T') && (NXT(3) == 'I') &&
5139 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5140 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005141 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillardcf461992000-03-14 18:30:20 +00005142 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005143 (NXT(2) == 'T') && (NXT(3) == 'I') &&
5144 (NXT(4) == 'T') && (NXT(5) == 'I') &&
5145 (NXT(6) == 'E') && (NXT(7) == 'S')) {
5146 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005147 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillardcf461992000-03-14 18:30:20 +00005148 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005149 (NXT(2) == 'T') && (NXT(3) == 'O') &&
5150 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005151 (NXT(6) == 'N') && (NXT(7) == 'S')) {
5152 SKIP(8);
5153 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillardcf461992000-03-14 18:30:20 +00005154 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005155 (NXT(2) == 'T') && (NXT(3) == 'O') &&
5156 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005157 (NXT(6) == 'N')) {
5158 SKIP(7);
5159 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005160 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00005161 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00005162}
5163
Daniel Veillard11e00581998-10-24 18:27:49 +00005164/**
5165 * xmlParseAttributeListDecl:
5166 * @ctxt: an XML parser context
5167 *
5168 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00005169 *
5170 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5171 *
5172 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00005173 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005174 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005175void
5176xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005177 xmlChar *elemName;
5178 xmlChar *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005179 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005180
Daniel Veillardcf461992000-03-14 18:30:20 +00005181 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005182 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5183 (NXT(4) == 'T') && (NXT(5) == 'L') &&
5184 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005185 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005186 xmlParserInputPtr input = ctxt->input;
5187
Daniel Veillard260a68f1998-08-13 03:39:55 +00005188 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005189 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005191 ctxt->sax->error(ctxt->userData,
5192 "Space required after '<!ATTLIST'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005193 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005194 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005195 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005196 }
5197 SKIP_BLANKS;
5198 elemName = xmlParseName(ctxt);
5199 if (elemName == NULL) {
5200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005201 ctxt->sax->error(ctxt->userData,
5202 "ATTLIST: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005203 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005204 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005205 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005206 return;
5207 }
5208 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005209 while (RAW != '>') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005210 const xmlChar *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005211 int type;
5212 int def;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005213 xmlChar *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005214
Daniel Veillardb05deb71999-08-10 19:04:08 +00005215 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005216 attrName = xmlParseName(ctxt);
5217 if (attrName == NULL) {
5218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005219 ctxt->sax->error(ctxt->userData,
5220 "ATTLIST: no name for Attribute\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005221 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005222 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005223 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005224 break;
5225 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005226 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005227 if (!IS_BLANK(CUR)) {
5228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005229 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005230 "Space required after the attribute name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005231 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005232 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005233 ctxt->disableSAX = 1;
5234 if (attrName != NULL)
5235 xmlFree(attrName);
5236 if (defaultValue != NULL)
5237 xmlFree(defaultValue);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005238 break;
5239 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005240 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005241
Daniel Veillard1e346af1999-02-22 10:33:01 +00005242 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillardcf461992000-03-14 18:30:20 +00005243 if (type <= 0) {
5244 if (attrName != NULL)
5245 xmlFree(attrName);
5246 if (defaultValue != NULL)
5247 xmlFree(defaultValue);
5248 break;
5249 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005250
Daniel Veillardb05deb71999-08-10 19:04:08 +00005251 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005252 if (!IS_BLANK(CUR)) {
5253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005254 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005255 "Space required after the attribute type\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005256 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005257 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005258 ctxt->disableSAX = 1;
5259 if (attrName != NULL)
5260 xmlFree(attrName);
5261 if (defaultValue != NULL)
5262 xmlFree(defaultValue);
5263 if (tree != NULL)
5264 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005265 break;
5266 }
5267 SKIP_BLANKS;
5268
5269 def = xmlParseDefaultDecl(ctxt, &defaultValue);
Daniel Veillardcf461992000-03-14 18:30:20 +00005270 if (def <= 0) {
5271 if (attrName != NULL)
5272 xmlFree(attrName);
5273 if (defaultValue != NULL)
5274 xmlFree(defaultValue);
5275 if (tree != NULL)
5276 xmlFreeEnumeration(tree);
5277 break;
5278 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005279
Daniel Veillardb05deb71999-08-10 19:04:08 +00005280 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005281 if (RAW != '>') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005282 if (!IS_BLANK(CUR)) {
5283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005284 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005285 "Space required after the attribute default value\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005286 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005287 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005288 ctxt->disableSAX = 1;
5289 if (attrName != NULL)
5290 xmlFree(attrName);
5291 if (defaultValue != NULL)
5292 xmlFree(defaultValue);
5293 if (tree != NULL)
5294 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005295 break;
5296 }
5297 SKIP_BLANKS;
5298 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005299 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005301 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005302 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005303 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardcf461992000-03-14 18:30:20 +00005304 if (attrName != NULL)
5305 xmlFree(attrName);
5306 if (defaultValue != NULL)
5307 xmlFree(defaultValue);
5308 if (tree != NULL)
5309 xmlFreeEnumeration(tree);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005310 break;
5311 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005312 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5313 (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005314 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005315 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005316 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005317 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005318 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005319 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005320 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005321 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005322 if (RAW == '>') {
5323 if (input != ctxt->input) {
5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5325 ctxt->sax->error(ctxt->userData,
5326"Attribute list declaration doesn't start and stop in the same entity\n");
5327 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5328 ctxt->wellFormed = 0;
5329 ctxt->disableSAX = 1;
5330 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005331 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005332 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005333
Daniel Veillard6454aec1999-09-02 22:04:43 +00005334 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005335 }
5336}
5337
Daniel Veillard11e00581998-10-24 18:27:49 +00005338/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005339 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00005340 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00005341 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005342 * parse the declaration for a Mixed Element content
5343 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00005344 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005345 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5346 * '(' S? '#PCDATA' S? ')'
5347 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005348 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5349 *
5350 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005351 * The same name must not appear more than once in a single
5352 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005353 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005354 * returns: the list of the xmlElementContentPtr describing the element choices
5355 */
5356xmlElementContentPtr
5357xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00005358 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005359 xmlChar *elem = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005360
Daniel Veillardb05deb71999-08-10 19:04:08 +00005361 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005362 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005363 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5364 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5365 (NXT(6) == 'A')) {
5366 SKIP(7);
5367 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005368 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005369 if (RAW == ')') {
5370 ctxt->entity = ctxt->input;
Daniel Veillard3b9def11999-01-31 22:15:06 +00005371 NEXT;
5372 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00005373 if (RAW == '*') {
Daniel Veillardf600e251999-12-18 15:32:46 +00005374 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5375 NEXT;
5376 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00005377 return(ret);
5378 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005379 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005380 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5381 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005382 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005383 while (RAW == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00005384 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005385 if (elem == NULL) {
5386 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5387 if (ret == NULL) return(NULL);
5388 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00005389 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005390 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00005391 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5392 if (n == NULL) return(NULL);
5393 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5394 cur->c2 = n;
5395 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00005396 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005397 }
5398 SKIP_BLANKS;
5399 elem = xmlParseName(ctxt);
5400 if (elem == NULL) {
5401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005402 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005403 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005404 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005405 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005406 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005407 xmlFreeElementContent(cur);
5408 return(NULL);
5409 }
5410 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005411 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005412 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005413 if ((RAW == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005414 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005415 cur->c2 = xmlNewElementContent(elem,
5416 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005417 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005418 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005419 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005420 ctxt->entity = ctxt->input;
Daniel Veillard1899e851999-02-01 12:18:54 +00005421 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005422 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00005423 if (elem != NULL) xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005425 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00005426 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005427 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005428 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005429 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005430 xmlFreeElementContent(ret);
5431 return(NULL);
5432 }
5433
5434 } else {
5435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005436 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005437 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005438 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005439 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005440 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005441 }
5442 return(ret);
5443}
5444
5445/**
5446 * xmlParseElementChildrenContentDecl:
5447 * @ctxt: an XML parser context
5448 *
5449 * parse the declaration for a Mixed Element content
5450 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5451 *
5452 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005453 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5454 *
5455 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5456 *
5457 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5458 *
5459 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5460 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005461 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5462 * TODO Parameter-entity replacement text must be properly nested
5463 * with parenthetized groups. That is to say, if either of the
5464 * opening or closing parentheses in a choice, seq, or Mixed
5465 * construct is contained in the replacement text for a parameter
5466 * entity, both must be contained in the same replacement text. For
5467 * interoperability, if a parameter-entity reference appears in a
5468 * choice, seq, or Mixed construct, its replacement text should not
5469 * be empty, and neither the first nor last non-blank character of
5470 * the replacement text should be a connector (| or ,).
5471 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005472 * returns: the tree of xmlElementContentPtr describing the element
5473 * hierarchy.
5474 */
5475xmlElementContentPtr
5476xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
5477 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005478 xmlChar *elem;
5479 xmlChar type = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005480
5481 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005482 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005483 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005484 /* Recurse on first child */
5485 NEXT;
5486 SKIP_BLANKS;
5487 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5488 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005489 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005490 } else {
5491 elem = xmlParseName(ctxt);
5492 if (elem == NULL) {
5493 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005494 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005495 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005496 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005497 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005498 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005499 return(NULL);
5500 }
5501 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005502 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005503 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005504 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005505 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005506 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005507 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005508 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005509 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005510 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005511 NEXT;
5512 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005513 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005514 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005515 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005516 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005517 }
5518 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005519 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005520 while (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005521 /*
5522 * Each loop we parse one separator and one element.
5523 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005524 if (RAW == ',') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005525 if (type == 0) type = CUR;
5526
5527 /*
5528 * Detect "Name | Name , Name" error
5529 */
5530 else if (type != CUR) {
5531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005532 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005533 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5534 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005535 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005536 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005537 ctxt->disableSAX = 1;
5538 if ((op != NULL) && (op != ret))
5539 xmlFreeElementContent(op);
5540 if ((last != NULL) && (last != ret))
5541 xmlFreeElementContent(last);
5542 if (ret != NULL)
5543 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005544 return(NULL);
5545 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005546 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005547
5548 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5549 if (op == NULL) {
5550 xmlFreeElementContent(ret);
5551 return(NULL);
5552 }
5553 if (last == NULL) {
5554 op->c1 = ret;
5555 ret = cur = op;
5556 } else {
5557 cur->c2 = op;
5558 op->c1 = last;
5559 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00005560 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005561 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005562 } else if (RAW == '|') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005563 if (type == 0) type = CUR;
5564
5565 /*
5566 * Detect "Name , Name | Name" error
5567 */
5568 else if (type != CUR) {
5569 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005570 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005571 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5572 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005573 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005574 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005575 ctxt->disableSAX = 1;
5576 if ((op != NULL) && (op != ret))
5577 xmlFreeElementContent(op);
5578 if ((last != NULL) && (last != ret))
5579 xmlFreeElementContent(last);
5580 if (ret != NULL)
5581 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005582 return(NULL);
5583 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005584 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005585
5586 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5587 if (op == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005588 if ((op != NULL) && (op != ret))
5589 xmlFreeElementContent(op);
5590 if ((last != NULL) && (last != ret))
5591 xmlFreeElementContent(last);
5592 if (ret != NULL)
5593 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005594 return(NULL);
5595 }
5596 if (last == NULL) {
5597 op->c1 = ret;
5598 ret = cur = op;
5599 } else {
5600 cur->c2 = op;
5601 op->c1 = last;
5602 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00005603 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005604 }
5605 } else {
5606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005607 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005608 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5609 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005610 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005611 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
Daniel Veillardcf461992000-03-14 18:30:20 +00005612 if ((op != NULL) && (op != ret))
5613 xmlFreeElementContent(op);
5614 if ((last != NULL) && (last != ret))
5615 xmlFreeElementContent(last);
5616 if (ret != NULL)
5617 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005618 return(NULL);
5619 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005620 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005621 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005622 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005623 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005624 /* Recurse on second child */
5625 NEXT;
5626 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00005627 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005628 SKIP_BLANKS;
5629 } else {
5630 elem = xmlParseName(ctxt);
5631 if (elem == NULL) {
5632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005633 ctxt->sax->error(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005634 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005635 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005636 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005637 ctxt->disableSAX = 1;
5638 if ((op != NULL) && (op != ret))
5639 xmlFreeElementContent(op);
5640 if ((last != NULL) && (last != ret))
5641 xmlFreeElementContent(last);
5642 if (ret != NULL)
5643 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005644 return(NULL);
5645 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005646 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005647 xmlFree(elem);
Daniel Veillardcf461992000-03-14 18:30:20 +00005648 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005649 last->ocur = XML_ELEMENT_CONTENT_OPT;
5650 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005651 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005652 last->ocur = XML_ELEMENT_CONTENT_MULT;
5653 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005654 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005655 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5656 NEXT;
5657 } else {
5658 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5659 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005660 }
5661 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005662 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005663 }
Daniel Veillard1899e851999-02-01 12:18:54 +00005664 if ((cur != NULL) && (last != NULL)) {
5665 cur->c2 = last;
5666 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005667 ctxt->entity = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005668 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005669 if (RAW == '?') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005670 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5671 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005672 } else if (RAW == '*') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005673 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5674 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005675 } else if (RAW == '+') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005676 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5677 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005678 }
5679 return(ret);
5680}
5681
5682/**
5683 * xmlParseElementContentDecl:
5684 * @ctxt: an XML parser context
5685 * @name: the name of the element being defined.
5686 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00005687 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005688 * parse the declaration for an Element content either Mixed or Children,
5689 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5690 *
5691 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00005692 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005693 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00005694 */
5695
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005696int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005697xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005698 xmlElementContentPtr *result) {
5699
5700 xmlElementContentPtr tree = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00005701 xmlParserInputPtr input = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005702 int res;
5703
5704 *result = NULL;
5705
Daniel Veillardcf461992000-03-14 18:30:20 +00005706 if (RAW != '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005708 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005709 "xmlParseElementContentDecl : '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005710 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005711 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005712 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005713 return(-1);
5714 }
5715 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005716 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005717 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005718 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005719 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5720 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5721 (NXT(6) == 'A')) {
5722 tree = xmlParseElementMixedContentDecl(ctxt);
5723 res = XML_ELEMENT_TYPE_MIXED;
5724 } else {
5725 tree = xmlParseElementChildrenContentDecl(ctxt);
5726 res = XML_ELEMENT_TYPE_ELEMENT;
5727 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005728 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5730 ctxt->sax->error(ctxt->userData,
5731"Element content declaration doesn't start and stop in the same entity\n");
5732 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5733 ctxt->wellFormed = 0;
5734 ctxt->disableSAX = 1;
5735 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005736 SKIP_BLANKS;
5737 /****************************
Daniel Veillardcf461992000-03-14 18:30:20 +00005738 if (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005739 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005740 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005741 "xmlParseElementContentDecl : ')' expected\n");
5742 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005743 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005744 return(-1);
5745 }
5746 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00005747 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005748 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005749}
5750
Daniel Veillard11e00581998-10-24 18:27:49 +00005751/**
5752 * xmlParseElementDecl:
5753 * @ctxt: an XML parser context
5754 *
5755 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005756 *
5757 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5758 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005759 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005760 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00005761 *
5762 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00005763 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005764int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005765xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005766 xmlChar *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005767 int ret = -1;
5768 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005769
Daniel Veillardb05deb71999-08-10 19:04:08 +00005770 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005771 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005772 (NXT(2) == 'E') && (NXT(3) == 'L') &&
5773 (NXT(4) == 'E') && (NXT(5) == 'M') &&
5774 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005775 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005776 xmlParserInputPtr input = ctxt->input;
5777
Daniel Veillard260a68f1998-08-13 03:39:55 +00005778 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005779 if (!IS_BLANK(CUR)) {
5780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005781 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005782 "Space required after 'ELEMENT'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005783 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005784 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005785 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005786 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005787 SKIP_BLANKS;
5788 name = xmlParseName(ctxt);
5789 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005791 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005792 "xmlParseElementDecl: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005793 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005794 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005795 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005796 return(-1);
5797 }
5798 if (!IS_BLANK(CUR)) {
5799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005800 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005801 "Space required after the element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005802 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005803 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005804 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005805 }
5806 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005807 if ((RAW == 'E') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005808 (NXT(2) == 'P') && (NXT(3) == 'T') &&
5809 (NXT(4) == 'Y')) {
5810 SKIP(5);
5811 /*
5812 * Element must always be empty.
5813 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005814 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillardcf461992000-03-14 18:30:20 +00005815 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005816 (NXT(2) == 'Y')) {
5817 SKIP(3);
5818 /*
5819 * Element is a generic container.
5820 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005821 ret = XML_ELEMENT_TYPE_ANY;
Daniel Veillardcf461992000-03-14 18:30:20 +00005822 } else if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005823 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005824 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005825 /*
5826 * [ WFC: PEs in Internal Subset ] error handling.
5827 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005828 if ((RAW == '%') && (ctxt->external == 0) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005829 (ctxt->inputNr == 1)) {
5830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5831 ctxt->sax->error(ctxt->userData,
5832 "PEReference: forbidden within markup decl in internal subset\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005833 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005834 } else {
5835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5836 ctxt->sax->error(ctxt->userData,
5837 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005838 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005839 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005840 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005841 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00005842 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005843 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005844 }
Daniel Veillard686d6b62000-01-03 11:08:02 +00005845
Daniel Veillard260a68f1998-08-13 03:39:55 +00005846 SKIP_BLANKS;
Daniel Veillard686d6b62000-01-03 11:08:02 +00005847 /*
5848 * Pop-up of finished entities.
5849 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005850 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard686d6b62000-01-03 11:08:02 +00005851 xmlPopInput(ctxt);
5852 SKIP_BLANKS;
5853
Daniel Veillardcf461992000-03-14 18:30:20 +00005854 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005856 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005857 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005858 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005859 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005860 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005861 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005862 if (input != ctxt->input) {
5863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5864 ctxt->sax->error(ctxt->userData,
5865"Element declaration doesn't start and stop in the same entity\n");
5866 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5867 ctxt->wellFormed = 0;
5868 ctxt->disableSAX = 1;
5869 }
5870
Daniel Veillard260a68f1998-08-13 03:39:55 +00005871 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005872 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5873 (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005874 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5875 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005876 }
Daniel Veillard14fff061999-06-22 21:49:07 +00005877 if (content != NULL) {
5878 xmlFreeElementContent(content);
5879 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005880 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00005881 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005882 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005883 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005884 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005885}
5886
Daniel Veillard11e00581998-10-24 18:27:49 +00005887/**
5888 * xmlParseMarkupDecl:
5889 * @ctxt: an XML parser context
5890 *
5891 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00005892 *
5893 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5894 * NotationDecl | PI | Comment
5895 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005896 * [ VC: Proper Declaration/PE Nesting ]
5897 * TODO Parameter-entity replacement text must be properly nested with
5898 * markup declarations. That is to say, if either the first character
5899 * or the last character of a markup declaration (markupdecl above) is
5900 * contained in the replacement text for a parameter-entity reference,
5901 * both must be contained in the same replacement text.
5902 *
5903 * [ WFC: PEs in Internal Subset ]
5904 * In the internal DTD subset, parameter-entity references can occur
5905 * only where markup declarations can occur, not within markup declarations.
5906 * (This does not apply to references that occur in external parameter
5907 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00005908 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005909void
5910xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00005911 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005912 xmlParseElementDecl(ctxt);
5913 xmlParseAttributeListDecl(ctxt);
5914 xmlParseEntityDecl(ctxt);
5915 xmlParseNotationDecl(ctxt);
5916 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00005917 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005918 /*
5919 * This is only for internal subset. On external entities,
5920 * the replacement is done before parsing stage
5921 */
5922 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5923 xmlParsePEReference(ctxt);
5924 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005925}
5926
Daniel Veillard11e00581998-10-24 18:27:49 +00005927/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00005928 * xmlParseTextDecl:
5929 * @ctxt: an XML parser context
5930 *
5931 * parse an XML declaration header for external entities
5932 *
5933 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5934 *
Daniel Veillardcf461992000-03-14 18:30:20 +00005935 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
Daniel Veillard011b63c1999-06-02 17:44:04 +00005936 */
5937
Daniel Veillardcf461992000-03-14 18:30:20 +00005938void
Daniel Veillard011b63c1999-06-02 17:44:04 +00005939xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005940 xmlChar *version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005941
5942 /*
5943 * We know that '<?xml' is here.
5944 */
5945 SKIP(5);
5946
5947 if (!IS_BLANK(CUR)) {
5948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005949 ctxt->sax->error(ctxt->userData,
5950 "Space needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005951 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005952 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005953 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005954 }
5955 SKIP_BLANKS;
5956
5957 /*
5958 * We may have the VersionInfo here.
5959 */
5960 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005961 if (version == NULL)
5962 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00005963 ctxt->input->version = version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005964
5965 /*
5966 * We must have the encoding declaration
5967 */
5968 if (!IS_BLANK(CUR)) {
5969 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005970 ctxt->sax->error(ctxt->userData, "Space needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005971 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005972 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005973 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005974 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005975 ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005976
5977 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005978 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005979 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00005980 } else if (RAW == '>') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005981 /* Deprecated old WD ... */
5982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005983 ctxt->sax->error(ctxt->userData,
5984 "XML declaration must end-up with '?>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005985 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005986 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005987 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005988 NEXT;
5989 } else {
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005991 ctxt->sax->error(ctxt->userData,
5992 "parsing XML declaration: '?>' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005993 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005994 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005995 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00005996 MOVETO_ENDTAG(CUR_PTR);
5997 NEXT;
5998 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005999}
6000
6001/*
6002 * xmlParseConditionalSections
6003 * @ctxt: an XML parser context
6004 *
6005 * TODO : Conditionnal section are not yet supported !
6006 *
6007 * [61] conditionalSect ::= includeSect | ignoreSect
6008 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6009 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6010 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6011 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6012 */
6013
6014void
6015xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006016 SKIP(3);
6017 SKIP_BLANKS;
6018 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6019 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6020 (NXT(6) == 'E')) {
6021 SKIP(7);
6022 SKIP_BLANKS;
6023 if (RAW != '[') {
6024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6025 ctxt->sax->error(ctxt->userData,
6026 "XML conditional section '[' expected\n");
6027 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6028 ctxt->wellFormed = 0;
6029 ctxt->disableSAX = 1;
6030 } else {
6031 NEXT;
6032 }
6033 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6034 (NXT(2) != '>'))) {
6035 const xmlChar *check = CUR_PTR;
6036 int cons = ctxt->input->consumed;
6037 int tok = ctxt->token;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006038
Daniel Veillardcf461992000-03-14 18:30:20 +00006039 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6040 xmlParseConditionalSections(ctxt);
6041 } else if (IS_BLANK(CUR)) {
6042 NEXT;
6043 } else if (RAW == '%') {
6044 xmlParsePEReference(ctxt);
6045 } else
6046 xmlParseMarkupDecl(ctxt);
6047
6048 /*
6049 * Pop-up of finished entities.
6050 */
6051 while ((RAW == 0) && (ctxt->inputNr > 1))
6052 xmlPopInput(ctxt);
6053
6054 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6055 (tok == ctxt->token)) {
6056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6057 ctxt->sax->error(ctxt->userData,
6058 "Content error in the external subset\n");
6059 ctxt->wellFormed = 0;
6060 ctxt->disableSAX = 1;
6061 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6062 break;
6063 }
6064 }
6065 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6066 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
6067 int state;
6068
6069 SKIP(6);
6070 SKIP_BLANKS;
6071 if (RAW != '[') {
6072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6073 ctxt->sax->error(ctxt->userData,
6074 "XML conditional section '[' expected\n");
6075 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6076 ctxt->wellFormed = 0;
6077 ctxt->disableSAX = 1;
6078 } else {
6079 NEXT;
6080 }
6081
6082 /*
6083 * Parse up to the end of the conditionnal section
6084 * But disable SAX event generating DTD building in the meantime
6085 */
6086 state = ctxt->disableSAX;
6087 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6088 (NXT(2) != '>'))) {
6089 const xmlChar *check = CUR_PTR;
6090 int cons = ctxt->input->consumed;
6091 int tok = ctxt->token;
6092
6093 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6094 xmlParseConditionalSections(ctxt);
6095 } else if (IS_BLANK(CUR)) {
6096 NEXT;
6097 } else if (RAW == '%') {
6098 xmlParsePEReference(ctxt);
6099 } else
6100 xmlParseMarkupDecl(ctxt);
6101
6102 /*
6103 * Pop-up of finished entities.
6104 */
6105 while ((RAW == 0) && (ctxt->inputNr > 1))
6106 xmlPopInput(ctxt);
6107
6108 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6109 (tok == ctxt->token)) {
6110 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6111 ctxt->sax->error(ctxt->userData,
6112 "Content error in the external subset\n");
6113 ctxt->wellFormed = 0;
6114 ctxt->disableSAX = 1;
6115 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6116 break;
6117 }
6118 }
6119 ctxt->disableSAX = state;
6120 } else {
6121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6122 ctxt->sax->error(ctxt->userData,
6123 "XML conditional section INCLUDE or IGNORE keyword expected\n");
6124 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6125 ctxt->wellFormed = 0;
6126 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006127 }
6128
Daniel Veillardcf461992000-03-14 18:30:20 +00006129 if (RAW == 0)
Daniel Veillard71b656e2000-01-05 14:46:17 +00006130 SHRINK;
6131
Daniel Veillardcf461992000-03-14 18:30:20 +00006132 if (RAW == 0) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6134 ctxt->sax->error(ctxt->userData,
6135 "XML conditional section not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006136 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006137 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006138 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006139 } else {
6140 SKIP(3);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006141 }
6142}
6143
6144/**
Daniel Veillard00fdf371999-10-08 09:40:39 +00006145 * xmlParseExternalSubset:
Daniel Veillard011b63c1999-06-02 17:44:04 +00006146 * @ctxt: an XML parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00006147 * @ExternalID: the external identifier
6148 * @SystemID: the system identifier (or URL)
Daniel Veillard011b63c1999-06-02 17:44:04 +00006149 *
6150 * parse Markup declarations from an external subset
6151 *
6152 * [30] extSubset ::= textDecl? extSubsetDecl
6153 *
6154 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00006155 */
6156void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006157xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6158 const xmlChar *SystemID) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00006159 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006160 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard011b63c1999-06-02 17:44:04 +00006161 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6162 (NXT(4) == 'l')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006163 xmlParseTextDecl(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006164 }
6165 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006166 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00006167 }
6168 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6169 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6170
Daniel Veillardb05deb71999-08-10 19:04:08 +00006171 ctxt->instate = XML_PARSER_DTD;
6172 ctxt->external = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00006173 while (((RAW == '<') && (NXT(1) == '?')) ||
6174 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard011b63c1999-06-02 17:44:04 +00006175 IS_BLANK(CUR)) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006176 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006177 int cons = ctxt->input->consumed;
Daniel Veillardcf461992000-03-14 18:30:20 +00006178 int tok = ctxt->token;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006179
Daniel Veillardcf461992000-03-14 18:30:20 +00006180 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006181 xmlParseConditionalSections(ctxt);
6182 } else if (IS_BLANK(CUR)) {
6183 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006184 } else if (RAW == '%') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006185 xmlParsePEReference(ctxt);
6186 } else
6187 xmlParseMarkupDecl(ctxt);
6188
6189 /*
6190 * Pop-up of finished entities.
6191 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006192 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006193 xmlPopInput(ctxt);
6194
Daniel Veillardcf461992000-03-14 18:30:20 +00006195 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6196 (tok == ctxt->token)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6198 ctxt->sax->error(ctxt->userData,
6199 "Content error in the external subset\n");
6200 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006201 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006202 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006203 break;
6204 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006205 }
6206
Daniel Veillardcf461992000-03-14 18:30:20 +00006207 if (RAW != 0) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6209 ctxt->sax->error(ctxt->userData,
6210 "Extra content at the end of the document\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006211 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006212 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006213 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006214 }
6215
6216}
6217
6218/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00006219 * xmlParseReference:
6220 * @ctxt: an XML parser context
6221 *
6222 * parse and handle entity references in content, depending on the SAX
6223 * interface, this may end-up in a call to character() if this is a
6224 * CharRef, a predefined entity, if there is no reference() callback.
6225 * or if the parser was asked to switch to that mode.
6226 *
6227 * [67] Reference ::= EntityRef | CharRef
6228 */
6229void
6230xmlParseReference(xmlParserCtxtPtr ctxt) {
6231 xmlEntityPtr ent;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006232 xmlChar *val;
Daniel Veillardcf461992000-03-14 18:30:20 +00006233 if (RAW != '&') return;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006234
Daniel Veillardb96e6431999-08-29 21:02:19 +00006235 if (ctxt->inputNr > 1) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006236 xmlChar cur[2] = { '&' , 0 } ;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006237
Daniel Veillardcf461992000-03-14 18:30:20 +00006238 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6239 (!ctxt->disableSAX))
Daniel Veillardb96e6431999-08-29 21:02:19 +00006240 ctxt->sax->characters(ctxt->userData, cur, 1);
6241 if (ctxt->token == '&')
6242 ctxt->token = 0;
6243 else {
6244 SKIP(1);
6245 }
6246 return;
6247 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006248 if (NXT(1) == '#') {
Daniel Veillardcf461992000-03-14 18:30:20 +00006249 int i = 0;
6250 xmlChar out[10];
6251 int hex = NXT(2);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006252 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00006253
6254 if (ctxt->encoding != NULL) {
6255 /*
6256 * So we are using non-UTF-8 buffers
6257 * Check that the char fit on 8bits, if not
6258 * generate a CharRef.
6259 */
6260 if (val <= 0xFF) {
6261 out[0] = val;
6262 out[1] = 0;
6263 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6264 (!ctxt->disableSAX))
6265 ctxt->sax->characters(ctxt->userData, out, 1);
6266 } else {
6267 if ((hex == 'x') || (hex == 'X'))
6268 sprintf((char *)out, "#x%X", val);
6269 else
6270 sprintf((char *)out, "#%d", val);
6271 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6272 (!ctxt->disableSAX))
6273 ctxt->sax->reference(ctxt->userData, out);
6274 }
6275 } else {
6276 /*
6277 * Just encode the value in UTF-8
6278 */
6279 COPY_BUF(0 ,out, i, val);
6280 out[i] = 0;
6281 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6282 (!ctxt->disableSAX))
6283 ctxt->sax->characters(ctxt->userData, out, i);
6284 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006285 } else {
6286 ent = xmlParseEntityRef(ctxt);
6287 if (ent == NULL) return;
6288 if ((ent->name != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006289 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6290 xmlNodePtr list = NULL;
6291 int ret;
6292
6293
6294 /*
6295 * The first reference to the entity trigger a parsing phase
6296 * where the ent->children is filled with the result from
6297 * the parsing.
6298 */
6299 if (ent->children == NULL) {
6300 xmlChar *value;
6301 value = ent->content;
6302
6303 /*
6304 * Check that this entity is well formed
6305 */
6306 if ((value != NULL) &&
6307 (value[1] == 0) && (value[0] == '<') &&
6308 (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6309 /*
6310 * TODO: get definite answer on this !!!
6311 * Lots of entity decls are used to declare a single
6312 * char
6313 * <!ENTITY lt "<">
6314 * Which seems to be valid since
6315 * 2.4: The ampersand character (&) and the left angle
6316 * bracket (<) may appear in their literal form only
6317 * when used ... They are also legal within the literal
6318 * entity value of an internal entity declaration;i
6319 * see "4.3.2 Well-Formed Parsed Entities".
6320 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6321 * Looking at the OASIS test suite and James Clark
6322 * tests, this is broken. However the XML REC uses
6323 * it. Is the XML REC not well-formed ????
6324 * This is a hack to avoid this problem
6325 */
6326 list = xmlNewDocText(ctxt->myDoc, value);
6327 if (list != NULL) {
6328 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6329 (ent->children == NULL)) {
6330 ent->children = list;
6331 ent->last = list;
6332 list->parent = (xmlNodePtr) ent;
6333 } else {
6334 xmlFreeNodeList(list);
6335 }
6336 } else if (list != NULL) {
6337 xmlFreeNodeList(list);
6338 }
6339 } else {
6340 /*
6341 * 4.3.2: An internal general parsed entity is well-formed
6342 * if its replacement text matches the production labeled
6343 * content.
6344 */
6345 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6346 ctxt->depth++;
6347 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
6348 ctxt->sax, NULL, ctxt->depth,
6349 value, &list);
6350 ctxt->depth--;
6351 } else if (ent->etype ==
6352 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6353 ctxt->depth++;
6354 ret = xmlParseExternalEntity(ctxt->myDoc,
6355 ctxt->sax, NULL, ctxt->depth,
6356 ent->SystemID, ent->ExternalID, &list);
6357 ctxt->depth--;
6358 } else {
6359 ret = -1;
6360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361 ctxt->sax->error(ctxt->userData,
6362 "Internal: invalid entity type\n");
6363 }
6364 if (ret == XML_ERR_ENTITY_LOOP) {
6365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6366 ctxt->sax->error(ctxt->userData,
6367 "Detected entity reference loop\n");
6368 ctxt->wellFormed = 0;
6369 ctxt->disableSAX = 1;
6370 ctxt->errNo = XML_ERR_ENTITY_LOOP;
6371 } else if ((ret == 0) && (list != NULL)) {
6372 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6373 (ent->children == NULL)) {
6374 ent->children = list;
6375 while (list != NULL) {
6376 list->parent = (xmlNodePtr) ent;
6377 if (list->next == NULL)
6378 ent->last = list;
6379 list = list->next;
6380 }
6381 } else {
6382 xmlFreeNodeList(list);
6383 }
6384 } else if (ret > 0) {
6385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6386 ctxt->sax->error(ctxt->userData,
6387 "Entity value required\n");
6388 ctxt->errNo = ret;
6389 ctxt->wellFormed = 0;
6390 ctxt->disableSAX = 1;
6391 } else if (list != NULL) {
6392 xmlFreeNodeList(list);
6393 }
6394 }
6395 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00006396 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006397 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006398 /*
6399 * Create a node.
6400 */
6401 ctxt->sax->reference(ctxt->userData, ent->name);
6402 return;
6403 } else if (ctxt->replaceEntities) {
6404 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006405
Daniel Veillardb96e6431999-08-29 21:02:19 +00006406 input = xmlNewEntityInputStream(ctxt, ent);
6407 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00006408 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6409 (RAW == '<') && (NXT(1) == '?') &&
6410 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6411 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6412 xmlParseTextDecl(ctxt);
6413 if (input->standalone) {
6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415 ctxt->sax->error(ctxt->userData,
6416 "external parsed entities cannot be standalone\n");
6417 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6418 ctxt->wellFormed = 0;
6419 ctxt->disableSAX = 1;
6420 }
6421 }
6422 /*
6423 * !!! TODO: build the tree under the entity first
6424 * 1234
6425 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00006426 return;
6427 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006428 }
6429 val = ent->content;
6430 if (val == NULL) return;
6431 /*
6432 * inline the entity.
6433 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006434 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6435 (!ctxt->disableSAX))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006436 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6437 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006438}
6439
Daniel Veillard11e00581998-10-24 18:27:49 +00006440/**
6441 * xmlParseEntityRef:
6442 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00006443 *
6444 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00006445 *
6446 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00006447 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006448 * [ WFC: Entity Declared ]
6449 * In a document without any DTD, a document with only an internal DTD
6450 * subset which contains no parameter entity references, or a document
6451 * with "standalone='yes'", the Name given in the entity reference
6452 * must match that in an entity declaration, except that well-formed
6453 * documents need not declare any of the following entities: amp, lt,
6454 * gt, apos, quot. The declaration of a parameter entity must precede
6455 * any reference to it. Similarly, the declaration of a general entity
6456 * must precede any reference to it which appears in a default value in an
6457 * attribute-list declaration. Note that if entities are declared in the
6458 * external subset or in external parameter entities, a non-validating
6459 * processor is not obligated to read and process their declarations;
6460 * for such documents, the rule that an entity must be declared is a
6461 * well-formedness constraint only if standalone='yes'.
6462 *
6463 * [ WFC: Parsed Entity ]
6464 * An entity reference must not contain the name of an unparsed entity
6465 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00006466 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006467 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00006468xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006469xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006470 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00006471 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006472
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006473 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006474
Daniel Veillardcf461992000-03-14 18:30:20 +00006475 if (RAW == '&') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006476 NEXT;
6477 name = xmlParseName(ctxt);
6478 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00006480 ctxt->sax->error(ctxt->userData,
6481 "xmlParseEntityRef: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006482 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006483 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006484 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006485 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006486 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006487 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006488 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00006489 * Ask first SAX for entity resolution, otherwise try the
6490 * predefined set.
6491 */
6492 if (ctxt->sax != NULL) {
6493 if (ctxt->sax->getEntity != NULL)
6494 ent = ctxt->sax->getEntity(ctxt->userData, name);
6495 if (ent == NULL)
6496 ent = xmlGetPredefinedEntity(name);
6497 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006498 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006499 * [ WFC: Entity Declared ]
6500 * In a document without any DTD, a document with only an
6501 * internal DTD subset which contains no parameter entity
6502 * references, or a document with "standalone='yes'", the
6503 * Name given in the entity reference must match that in an
6504 * entity declaration, except that well-formed documents
6505 * need not declare any of the following entities: amp, lt,
6506 * gt, apos, quot.
6507 * The declaration of a parameter entity must precede any
6508 * reference to it.
6509 * Similarly, the declaration of a general entity must
6510 * precede any reference to it which appears in a default
6511 * value in an attribute-list declaration. Note that if
6512 * entities are declared in the external subset or in
6513 * external parameter entities, a non-validating processor
6514 * is not obligated to read and process their declarations;
6515 * for such documents, the rule that an entity must be
6516 * declared is a well-formedness constraint only if
6517 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006518 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00006519 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006520 if ((ctxt->standalone == 1) ||
6521 ((ctxt->hasExternalSubset == 0) &&
6522 (ctxt->hasPErefs == 0))) {
6523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006524 ctxt->sax->error(ctxt->userData,
6525 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006526 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006527 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006528 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006529 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006530 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6531 ctxt->sax->warning(ctxt->userData,
6532 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006533 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006534 }
6535 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006536
6537 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006538 * [ WFC: Parsed Entity ]
6539 * An entity reference must not contain the name of an
6540 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006541 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006542 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6544 ctxt->sax->error(ctxt->userData,
6545 "Entity reference to unparsed entity %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006546 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006547 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006548 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006549 }
6550
6551 /*
6552 * [ WFC: No External Entity References ]
6553 * Attribute values cannot contain direct or indirect
6554 * entity references to external entities.
6555 */
6556 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006557 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "Attribute references external entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006561 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006562 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006563 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006564 }
6565 /*
6566 * [ WFC: No < in Attribute Values ]
6567 * The replacement text of any entity referred to directly or
6568 * indirectly in an attribute value (other than "&lt;") must
6569 * not contain a <.
6570 */
6571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00006572 (ent != NULL) &&
6573 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00006574 (ent->content != NULL) &&
6575 (xmlStrchr(ent->content, '<'))) {
6576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6577 ctxt->sax->error(ctxt->userData,
6578 "'<' in entity '%s' is not allowed in attributes values\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006579 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006580 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006581 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006582 }
6583
6584 /*
6585 * Internal check, no parameter entities here ...
6586 */
6587 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006588 switch (ent->etype) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006589 case XML_INTERNAL_PARAMETER_ENTITY:
6590 case XML_EXTERNAL_PARAMETER_ENTITY:
6591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006592 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006593 "Attempt to reference the parameter entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006594 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006595 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006596 ctxt->disableSAX = 1;
6597 break;
6598 default:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006599 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006600 }
6601 }
6602
6603 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00006604 * [ WFC: No Recursion ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006605 * TODO A parsed entity must not contain a recursive reference
6606 * to itself, either directly or indirectly.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006607 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00006608
Daniel Veillard011b63c1999-06-02 17:44:04 +00006609 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006611 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006612 "xmlParseEntityRef: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006613 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006614 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006615 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006616 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00006617 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006618 }
6619 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006620 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006621}
Daniel Veillard10a2c651999-12-12 13:03:50 +00006622/**
6623 * xmlParseStringEntityRef:
6624 * @ctxt: an XML parser context
6625 * @str: a pointer to an index in the string
6626 *
6627 * parse ENTITY references declarations, but this version parses it from
6628 * a string value.
6629 *
6630 * [68] EntityRef ::= '&' Name ';'
6631 *
6632 * [ WFC: Entity Declared ]
6633 * In a document without any DTD, a document with only an internal DTD
6634 * subset which contains no parameter entity references, or a document
6635 * with "standalone='yes'", the Name given in the entity reference
6636 * must match that in an entity declaration, except that well-formed
6637 * documents need not declare any of the following entities: amp, lt,
6638 * gt, apos, quot. The declaration of a parameter entity must precede
6639 * any reference to it. Similarly, the declaration of a general entity
6640 * must precede any reference to it which appears in a default value in an
6641 * attribute-list declaration. Note that if entities are declared in the
6642 * external subset or in external parameter entities, a non-validating
6643 * processor is not obligated to read and process their declarations;
6644 * for such documents, the rule that an entity must be declared is a
6645 * well-formedness constraint only if standalone='yes'.
6646 *
6647 * [ WFC: Parsed Entity ]
6648 * An entity reference must not contain the name of an unparsed entity
6649 *
6650 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6651 * is updated to the current location in the string.
6652 */
6653xmlEntityPtr
6654xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6655 xmlChar *name;
6656 const xmlChar *ptr;
6657 xmlChar cur;
6658 xmlEntityPtr ent = NULL;
6659
Daniel Veillardcf461992000-03-14 18:30:20 +00006660 if ((str == NULL) || (*str == NULL))
6661 return(NULL);
Daniel Veillard10a2c651999-12-12 13:03:50 +00006662 ptr = *str;
6663 cur = *ptr;
6664 if (cur == '&') {
6665 ptr++;
6666 cur = *ptr;
6667 name = xmlParseStringName(ctxt, &ptr);
6668 if (name == NULL) {
6669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6670 ctxt->sax->error(ctxt->userData,
6671 "xmlParseEntityRef: no name\n");
6672 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6673 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006674 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006675 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006676 if (*ptr == ';') {
6677 ptr++;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006678 /*
6679 * Ask first SAX for entity resolution, otherwise try the
6680 * predefined set.
6681 */
6682 if (ctxt->sax != NULL) {
6683 if (ctxt->sax->getEntity != NULL)
6684 ent = ctxt->sax->getEntity(ctxt->userData, name);
6685 if (ent == NULL)
6686 ent = xmlGetPredefinedEntity(name);
6687 }
6688 /*
6689 * [ WFC: Entity Declared ]
6690 * In a document without any DTD, a document with only an
6691 * internal DTD subset which contains no parameter entity
6692 * references, or a document with "standalone='yes'", the
6693 * Name given in the entity reference must match that in an
6694 * entity declaration, except that well-formed documents
6695 * need not declare any of the following entities: amp, lt,
6696 * gt, apos, quot.
6697 * The declaration of a parameter entity must precede any
6698 * reference to it.
6699 * Similarly, the declaration of a general entity must
6700 * precede any reference to it which appears in a default
6701 * value in an attribute-list declaration. Note that if
6702 * entities are declared in the external subset or in
6703 * external parameter entities, a non-validating processor
6704 * is not obligated to read and process their declarations;
6705 * for such documents, the rule that an entity must be
6706 * declared is a well-formedness constraint only if
6707 * standalone='yes'.
6708 */
6709 if (ent == NULL) {
6710 if ((ctxt->standalone == 1) ||
6711 ((ctxt->hasExternalSubset == 0) &&
6712 (ctxt->hasPErefs == 0))) {
6713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6714 ctxt->sax->error(ctxt->userData,
6715 "Entity '%s' not defined\n", name);
6716 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6717 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006718 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006719 } else {
6720 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6721 ctxt->sax->warning(ctxt->userData,
6722 "Entity '%s' not defined\n", name);
6723 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6724 }
6725 }
6726
6727 /*
6728 * [ WFC: Parsed Entity ]
6729 * An entity reference must not contain the name of an
6730 * unparsed entity
6731 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006732 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6734 ctxt->sax->error(ctxt->userData,
6735 "Entity reference to unparsed entity %s\n", name);
6736 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6737 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006738 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006739 }
6740
6741 /*
6742 * [ WFC: No External Entity References ]
6743 * Attribute values cannot contain direct or indirect
6744 * entity references to external entities.
6745 */
6746 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00006747 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6749 ctxt->sax->error(ctxt->userData,
6750 "Attribute references external entity '%s'\n", name);
6751 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6752 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006753 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006754 }
6755 /*
6756 * [ WFC: No < in Attribute Values ]
6757 * The replacement text of any entity referred to directly or
6758 * indirectly in an attribute value (other than "&lt;") must
6759 * not contain a <.
6760 */
6761 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6762 (ent != NULL) &&
6763 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6764 (ent->content != NULL) &&
6765 (xmlStrchr(ent->content, '<'))) {
6766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767 ctxt->sax->error(ctxt->userData,
6768 "'<' in entity '%s' is not allowed in attributes values\n", name);
6769 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6770 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006771 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006772 }
6773
6774 /*
6775 * Internal check, no parameter entities here ...
6776 */
6777 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006778 switch (ent->etype) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00006779 case XML_INTERNAL_PARAMETER_ENTITY:
6780 case XML_EXTERNAL_PARAMETER_ENTITY:
6781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6782 ctxt->sax->error(ctxt->userData,
6783 "Attempt to reference the parameter entity '%s'\n", name);
6784 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6785 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006786 ctxt->disableSAX = 1;
6787 break;
6788 default:
Daniel Veillard10a2c651999-12-12 13:03:50 +00006789 break;
6790 }
6791 }
6792
6793 /*
6794 * [ WFC: No Recursion ]
6795 * TODO A parsed entity must not contain a recursive reference
6796 * to itself, either directly or indirectly.
6797 */
6798
6799 } else {
6800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6801 ctxt->sax->error(ctxt->userData,
6802 "xmlParseEntityRef: expecting ';'\n");
6803 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6804 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006805 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006806 }
6807 xmlFree(name);
6808 }
6809 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006810 *str = ptr;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006811 return(ent);
6812}
Daniel Veillard260a68f1998-08-13 03:39:55 +00006813
Daniel Veillard11e00581998-10-24 18:27:49 +00006814/**
6815 * xmlParsePEReference:
6816 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00006817 *
6818 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00006819 * The entity content is handled directly by pushing it's content as
6820 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006821 *
6822 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00006823 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006824 * [ WFC: No Recursion ]
6825 * TODO A parsed entity must not contain a recursive
6826 * reference to itself, either directly or indirectly.
6827 *
6828 * [ WFC: Entity Declared ]
6829 * In a document without any DTD, a document with only an internal DTD
6830 * subset which contains no parameter entity references, or a document
6831 * with "standalone='yes'", ... ... The declaration of a parameter
6832 * entity must precede any reference to it...
6833 *
6834 * [ VC: Entity Declared ]
6835 * In a document with an external subset or external parameter entities
6836 * with "standalone='no'", ... ... The declaration of a parameter entity
6837 * must precede any reference to it...
6838 *
6839 * [ WFC: In DTD ]
6840 * Parameter-entity references may only appear in the DTD.
6841 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006842 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00006843void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006844xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006845 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00006846 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00006847 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006848
Daniel Veillardcf461992000-03-14 18:30:20 +00006849 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006850 NEXT;
6851 name = xmlParseName(ctxt);
6852 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006854 ctxt->sax->error(ctxt->userData,
6855 "xmlParsePEReference: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006856 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006857 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006858 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006859 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006860 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00006861 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006862 if ((ctxt->sax != NULL) &&
6863 (ctxt->sax->getParameterEntity != NULL))
6864 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6865 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006866 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006867 /*
6868 * [ WFC: Entity Declared ]
6869 * In a document without any DTD, a document with only an
6870 * internal DTD subset which contains no parameter entity
6871 * references, or a document with "standalone='yes'", ...
6872 * ... The declaration of a parameter entity must precede
6873 * any reference to it...
6874 */
6875 if ((ctxt->standalone == 1) ||
6876 ((ctxt->hasExternalSubset == 0) &&
6877 (ctxt->hasPErefs == 0))) {
6878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6879 ctxt->sax->error(ctxt->userData,
6880 "PEReference: %%%s; not found\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006881 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006882 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006883 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006884 } else {
6885 /*
6886 * [ VC: Entity Declared ]
6887 * In a document with an external subset or external
6888 * parameter entities with "standalone='no'", ...
6889 * ... The declaration of a parameter entity must precede
6890 * any reference to it...
6891 */
6892 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6893 ctxt->sax->warning(ctxt->userData,
6894 "PEReference: %%%s; not found\n", name);
6895 ctxt->valid = 0;
6896 }
Daniel Veillardccb09631998-10-27 06:21:04 +00006897 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006898 /*
6899 * Internal checking in case the entity quest barfed
6900 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006901 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6902 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006903 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6904 ctxt->sax->warning(ctxt->userData,
6905 "Internal: %%%s; is not a parameter entity\n", name);
6906 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006907 /*
6908 * TODO !!!
6909 * handle the extra spaces added before and after
6910 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6911 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00006912 input = xmlNewEntityInputStream(ctxt, entity);
6913 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00006914 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6915 (RAW == '<') && (NXT(1) == '?') &&
6916 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6917 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6918 xmlParseTextDecl(ctxt);
6919 }
6920 if (ctxt->token == 0)
6921 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00006922 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006923 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006924 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006925 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006927 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006928 "xmlParsePEReference: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006929 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006930 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006931 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006932 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00006933 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006934 }
6935 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006936}
6937
Daniel Veillard11e00581998-10-24 18:27:49 +00006938/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00006939 * xmlParseStringPEReference:
6940 * @ctxt: an XML parser context
6941 * @str: a pointer to an index in the string
6942 *
6943 * parse PEReference declarations
6944 *
6945 * [69] PEReference ::= '%' Name ';'
6946 *
6947 * [ WFC: No Recursion ]
6948 * TODO A parsed entity must not contain a recursive
6949 * reference to itself, either directly or indirectly.
6950 *
6951 * [ WFC: Entity Declared ]
6952 * In a document without any DTD, a document with only an internal DTD
6953 * subset which contains no parameter entity references, or a document
6954 * with "standalone='yes'", ... ... The declaration of a parameter
6955 * entity must precede any reference to it...
6956 *
6957 * [ VC: Entity Declared ]
6958 * In a document with an external subset or external parameter entities
6959 * with "standalone='no'", ... ... The declaration of a parameter entity
6960 * must precede any reference to it...
6961 *
6962 * [ WFC: In DTD ]
6963 * Parameter-entity references may only appear in the DTD.
6964 * NOTE: misleading but this is handled.
6965 *
6966 * Returns the string of the entity content.
6967 * str is updated to the current value of the index
6968 */
6969xmlEntityPtr
6970xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6971 const xmlChar *ptr;
6972 xmlChar cur;
6973 xmlChar *name;
6974 xmlEntityPtr entity = NULL;
6975
6976 if ((str == NULL) || (*str == NULL)) return(NULL);
6977 ptr = *str;
6978 cur = *ptr;
6979 if (cur == '%') {
6980 ptr++;
6981 cur = *ptr;
6982 name = xmlParseStringName(ctxt, &ptr);
6983 if (name == NULL) {
6984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6985 ctxt->sax->error(ctxt->userData,
6986 "xmlParseStringPEReference: no name\n");
6987 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6988 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006989 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00006990 } else {
6991 cur = *ptr;
6992 if (cur == ';') {
6993 ptr++;
6994 cur = *ptr;
6995 if ((ctxt->sax != NULL) &&
6996 (ctxt->sax->getParameterEntity != NULL))
6997 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6998 name);
6999 if (entity == NULL) {
7000 /*
7001 * [ WFC: Entity Declared ]
7002 * In a document without any DTD, a document with only an
7003 * internal DTD subset which contains no parameter entity
7004 * references, or a document with "standalone='yes'", ...
7005 * ... The declaration of a parameter entity must precede
7006 * any reference to it...
7007 */
7008 if ((ctxt->standalone == 1) ||
7009 ((ctxt->hasExternalSubset == 0) &&
7010 (ctxt->hasPErefs == 0))) {
7011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7012 ctxt->sax->error(ctxt->userData,
7013 "PEReference: %%%s; not found\n", name);
7014 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7015 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007016 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007017 } else {
7018 /*
7019 * [ VC: Entity Declared ]
7020 * In a document with an external subset or external
7021 * parameter entities with "standalone='no'", ...
7022 * ... The declaration of a parameter entity must
7023 * precede any reference to it...
7024 */
7025 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7026 ctxt->sax->warning(ctxt->userData,
7027 "PEReference: %%%s; not found\n", name);
7028 ctxt->valid = 0;
7029 }
7030 } else {
7031 /*
7032 * Internal checking in case the entity quest barfed
7033 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007034 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7035 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007036 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7037 ctxt->sax->warning(ctxt->userData,
7038 "Internal: %%%s; is not a parameter entity\n", name);
7039 }
7040 }
7041 ctxt->hasPErefs = 1;
7042 } else {
7043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7044 ctxt->sax->error(ctxt->userData,
7045 "xmlParseStringPEReference: expecting ';'\n");
7046 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7047 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007048 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007049 }
7050 xmlFree(name);
7051 }
7052 }
7053 *str = ptr;
7054 return(entity);
7055}
7056
7057/**
Daniel Veillardcf461992000-03-14 18:30:20 +00007058 * xmlParseDocTypeDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00007059 * @ctxt: an XML parser context
7060 *
7061 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00007062 *
7063 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7064 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00007065 *
7066 * [ VC: Root Element Type ]
7067 * The Name in the document type declaration must match the element
7068 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007069 */
7070
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007071void
7072xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007073 xmlChar *name = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007074 xmlChar *ExternalID = NULL;
7075 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007076
7077 /*
7078 * We know that '<!DOCTYPE' has been detected.
7079 */
7080 SKIP(9);
7081
7082 SKIP_BLANKS;
7083
7084 /*
7085 * Parse the DOCTYPE name.
7086 */
7087 name = xmlParseName(ctxt);
7088 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007090 ctxt->sax->error(ctxt->userData,
7091 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007092 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007093 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007094 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007095 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007096 ctxt->intSubName = name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007097
7098 SKIP_BLANKS;
7099
7100 /*
7101 * Check for SystemID and ExternalID
7102 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00007103 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007104
7105 if ((URI != NULL) || (ExternalID != NULL)) {
7106 ctxt->hasExternalSubset = 1;
7107 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007108 ctxt->extSubURI = URI;
7109 ctxt->extSubSystem = ExternalID;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007110
Daniel Veillard260a68f1998-08-13 03:39:55 +00007111 SKIP_BLANKS;
7112
Daniel Veillard011b63c1999-06-02 17:44:04 +00007113 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00007114 * Create and update the internal subset.
Daniel Veillard011b63c1999-06-02 17:44:04 +00007115 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007116 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7117 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007118 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007119
7120 /*
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007121 * Is there any internal subset declarations ?
7122 * they are handled separately in xmlParseInternalSubset()
7123 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007124 if (RAW == '[')
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007125 return;
7126
7127 /*
7128 * We should be at the end of the DOCTYPE declaration.
7129 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007130 if (RAW != '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7132 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7133 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007134 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007135 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7136 }
7137 NEXT;
7138}
7139
7140/**
Daniel Veillardcf461992000-03-14 18:30:20 +00007141 * xmlParseInternalsubset:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007142 * @ctxt: an XML parser context
7143 *
7144 * parse the internal subset declaration
7145 *
7146 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7147 */
7148
7149void
7150xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7151 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007152 * Is there any DTD definition ?
7153 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007154 if (RAW == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007155 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007156 NEXT;
7157 /*
7158 * Parse the succession of Markup declarations and
7159 * PEReferences.
7160 * Subsequence (markupdecl | PEReference | S)*
7161 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007162 while (RAW != ']') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007163 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007164 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007165
7166 SKIP_BLANKS;
7167 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00007168 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007169
Daniel Veillard011b63c1999-06-02 17:44:04 +00007170 /*
7171 * Pop-up of finished entities.
7172 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007173 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00007174 xmlPopInput(ctxt);
7175
Daniel Veillardc26087b1999-08-30 11:23:51 +00007176 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00007177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7178 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007179 "xmlParseInternalSubset: error detected in Markup declaration\n");
Daniel Veillardb96e6431999-08-29 21:02:19 +00007180 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007181 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007182 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007183 break;
7184 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007185 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007186 if (RAW == ']') NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007187 }
7188
7189 /*
7190 * We should be at the end of the DOCTYPE declaration.
7191 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007192 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007193 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007194 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007195 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007196 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007197 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007198 }
7199 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007200}
7201
Daniel Veillard11e00581998-10-24 18:27:49 +00007202/**
7203 * xmlParseAttribute:
7204 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007205 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00007206 *
7207 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00007208 *
7209 * [41] Attribute ::= Name Eq AttValue
7210 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007211 * [ WFC: No External Entity References ]
7212 * Attribute values cannot contain direct or indirect entity references
7213 * to external entities.
7214 *
7215 * [ WFC: No < in Attribute Values ]
7216 * The replacement text of any entity referred to directly or indirectly in
7217 * an attribute value (other than "&lt;") must not contain a <.
7218 *
7219 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00007220 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00007221 * declared for it.
7222 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00007223 * [25] Eq ::= S? '=' S?
7224 *
7225 * With namespace:
7226 *
7227 * [NS 11] Attribute ::= QName Eq AttValue
7228 *
7229 * Also the case QName == xmlns:??? is handled independently as a namespace
7230 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00007231 *
Daniel Veillard517752b1999-04-05 12:20:10 +00007232 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007233 */
7234
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007235xmlChar *
7236xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7237 xmlChar *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007238
Daniel Veillard517752b1999-04-05 12:20:10 +00007239 *value = NULL;
7240 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007241 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007243 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007244 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007245 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007246 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillardccb09631998-10-27 06:21:04 +00007247 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007248 }
7249
7250 /*
7251 * read the value
7252 */
7253 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007254 if (RAW == '=') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007255 NEXT;
7256 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00007257 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007258 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007259 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007261 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007262 "Specification mandate value for attribute %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007263 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007264 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007265 ctxt->disableSAX = 1;
7266 xmlFree(name);
Daniel Veillardccb09631998-10-27 06:21:04 +00007267 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007268 }
7269
Daniel Veillardcf461992000-03-14 18:30:20 +00007270 /*
7271 * Check that xml:lang conforms to the specification
7272 */
7273 if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7274 if (!xmlCheckLanguageID(val)) {
7275 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7276 ctxt->sax->error(ctxt->userData,
7277 "Invalid value for xml:lang : %s\n", val);
7278 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7279 ctxt->wellFormed = 0;
7280 ctxt->disableSAX = 1;
7281 }
7282 }
7283
7284 /*
7285 * Check that xml:space conforms to the specification
7286 */
7287 if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7288 if (!xmlStrcmp(val, BAD_CAST "default"))
7289 *(ctxt->space) = 0;
7290 else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7291 *(ctxt->space) = 1;
7292 else {
7293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7294 ctxt->sax->error(ctxt->userData,
7295"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7296 val);
7297 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7298 ctxt->wellFormed = 0;
7299 ctxt->disableSAX = 1;
7300 }
7301 }
7302
Daniel Veillard517752b1999-04-05 12:20:10 +00007303 *value = val;
7304 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007305}
7306
Daniel Veillard11e00581998-10-24 18:27:49 +00007307/**
7308 * xmlParseStartTag:
7309 * @ctxt: an XML parser context
7310 *
7311 * parse a start of tag either for rule element or
7312 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007313 *
7314 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7315 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007316 * [ WFC: Unique Att Spec ]
7317 * No attribute name may appear more than once in the same start-tag or
7318 * empty-element tag.
7319 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00007320 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7321 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007322 * [ WFC: Unique Att Spec ]
7323 * No attribute name may appear more than once in the same start-tag or
7324 * empty-element tag.
7325 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00007326 * With namespace:
7327 *
7328 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7329 *
7330 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00007331 *
Daniel Veillard7f858501999-11-17 17:32:38 +00007332 * Returne the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00007333 */
7334
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007335xmlChar *
Daniel Veillard1e346af1999-02-22 10:33:01 +00007336xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007337 xmlChar *name;
7338 xmlChar *attname;
7339 xmlChar *attvalue;
7340 const xmlChar **atts = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00007341 int nbatts = 0;
7342 int maxatts = 0;
7343 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007344
Daniel Veillardcf461992000-03-14 18:30:20 +00007345 if (RAW != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007346 NEXT;
7347
Daniel Veillard517752b1999-04-05 12:20:10 +00007348 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007349 if (name == NULL) {
7350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007351 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007352 "xmlParseStartTag: invalid element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007353 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007354 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007355 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00007356 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007357 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007358
7359 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007360 * Now parse the attributes, it ends up with the ending
7361 *
7362 * (S Attribute)* S?
7363 */
7364 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007365 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007366
7367 while ((IS_CHAR(RAW)) &&
7368 (RAW != '>') &&
7369 ((RAW != '/') || (NXT(1) != '>'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007370 const xmlChar *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007371 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007372
Daniel Veillard517752b1999-04-05 12:20:10 +00007373 attname = xmlParseAttribute(ctxt, &attvalue);
7374 if ((attname != NULL) && (attvalue != NULL)) {
7375 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007376 * [ WFC: Unique Att Spec ]
7377 * No attribute name may appear more than once in the same
7378 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00007379 */
7380 for (i = 0; i < nbatts;i += 2) {
7381 if (!xmlStrcmp(atts[i], attname)) {
7382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00007383 ctxt->sax->error(ctxt->userData,
7384 "Attribute %s redefined\n",
7385 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00007386 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007387 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007388 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard6454aec1999-09-02 22:04:43 +00007389 xmlFree(attname);
7390 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007391 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00007392 }
7393 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007394
Daniel Veillard517752b1999-04-05 12:20:10 +00007395 /*
7396 * Add the pair to atts
7397 */
7398 if (atts == NULL) {
7399 maxatts = 10;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007400 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00007401 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007402 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007403 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00007404 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00007405 }
Daniel Veillard51e3b151999-11-12 17:02:31 +00007406 } else if (nbatts + 4 > maxatts) {
Daniel Veillard517752b1999-04-05 12:20:10 +00007407 maxatts *= 2;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007408 atts = (const xmlChar **) xmlRealloc(atts,
7409 maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00007410 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00007411 fprintf(stderr, "realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007412 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00007413 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00007414 }
7415 }
7416 atts[nbatts++] = attname;
7417 atts[nbatts++] = attvalue;
7418 atts[nbatts] = NULL;
7419 atts[nbatts + 1] = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00007420 } else {
7421 if (attname != NULL)
7422 xmlFree(attname);
7423 if (attvalue != NULL)
7424 xmlFree(attvalue);
Daniel Veillard517752b1999-04-05 12:20:10 +00007425 }
7426
Daniel Veillardb96e6431999-08-29 21:02:19 +00007427failed:
Daniel Veillardcf461992000-03-14 18:30:20 +00007428
7429 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7430 break;
7431 if (!IS_BLANK(RAW)) {
7432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7433 ctxt->sax->error(ctxt->userData,
7434 "attributes construct error\n");
7435 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7436 ctxt->wellFormed = 0;
7437 ctxt->disableSAX = 1;
7438 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007439 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007440 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007442 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00007443 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007444 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007445 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007446 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007447 break;
7448 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007449 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007450 }
7451
7452 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007453 * SAX: Start of Element !
7454 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007455 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7456 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007457 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00007458
Daniel Veillard517752b1999-04-05 12:20:10 +00007459 if (atts != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007460 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
Daniel Veillard6454aec1999-09-02 22:04:43 +00007461 xmlFree(atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00007462 }
Daniel Veillard14fff061999-06-22 21:49:07 +00007463 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007464}
7465
Daniel Veillard11e00581998-10-24 18:27:49 +00007466/**
7467 * xmlParseEndTag:
7468 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00007469 *
7470 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00007471 *
7472 * [42] ETag ::= '</' Name S? '>'
7473 *
7474 * With namespace
7475 *
Daniel Veillard517752b1999-04-05 12:20:10 +00007476 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00007477 */
7478
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007479void
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007480xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007481 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007482 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007483
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007484 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007485 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007487 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007488 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007489 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007490 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007491 return;
7492 }
7493 SKIP(2);
7494
Daniel Veillard517752b1999-04-05 12:20:10 +00007495 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007496
7497 /*
7498 * We should definitely be at the ending "S? '>'" part
7499 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007500 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007501 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007502 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007504 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007505 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007506 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007507 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007508 } else
7509 NEXT;
7510
Daniel Veillard517752b1999-04-05 12:20:10 +00007511 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007512 * [ WFC: Element Type Match ]
7513 * The Name in an element's end-tag must match the element type in the
7514 * start-tag.
7515 *
Daniel Veillard14fff061999-06-22 21:49:07 +00007516 */
Daniel Veillardda07c342000-01-25 18:31:22 +00007517 if ((name == NULL) || (ctxt->name == NULL) ||
7518 (xmlStrcmp(name, ctxt->name))) {
7519 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7520 if ((name != NULL) && (ctxt->name != NULL)) {
7521 ctxt->sax->error(ctxt->userData,
7522 "Opening and ending tag mismatch: %s and %s\n",
7523 ctxt->name, name);
7524 } else if (ctxt->name != NULL) {
7525 ctxt->sax->error(ctxt->userData,
7526 "Ending tag eror for: %s\n", ctxt->name);
7527 } else {
7528 ctxt->sax->error(ctxt->userData,
7529 "Ending tag error: internal error ???\n");
7530 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007531
Daniel Veillardda07c342000-01-25 18:31:22 +00007532 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007533 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
Daniel Veillard14fff061999-06-22 21:49:07 +00007534 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007535 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00007536 }
7537
7538 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00007539 * SAX: End of Tag
7540 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007541 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7542 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007543 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00007544
7545 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00007546 xmlFree(name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007547 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007548 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007549 if (oldname != NULL) {
7550#ifdef DEBUG_STACK
7551 fprintf(stderr,"Close: popped %s\n", oldname);
7552#endif
7553 xmlFree(oldname);
7554 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007555 return;
7556}
7557
Daniel Veillard11e00581998-10-24 18:27:49 +00007558/**
7559 * xmlParseCDSect:
7560 * @ctxt: an XML parser context
7561 *
7562 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007563 *
7564 * [18] CDSect ::= CDStart CData CDEnd
7565 *
7566 * [19] CDStart ::= '<![CDATA['
7567 *
7568 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7569 *
7570 * [21] CDEnd ::= ']]>'
7571 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007572void
7573xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007574 xmlChar *buf = NULL;
7575 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007576 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00007577 int r, rl;
7578 int s, sl;
7579 int cur, l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007580
Daniel Veillardb05deb71999-08-10 19:04:08 +00007581 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007582 (NXT(2) == '[') && (NXT(3) == 'C') &&
7583 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7584 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7585 (NXT(8) == '[')) {
7586 SKIP(9);
7587 } else
7588 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007589
7590 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillardcf461992000-03-14 18:30:20 +00007591 r = CUR_CHAR(rl);
7592 if (!IS_CHAR(r)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007594 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00007595 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007596 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007597 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007598 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007599 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007600 return;
7601 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007602 NEXTL(rl);
7603 s = CUR_CHAR(sl);
7604 if (!IS_CHAR(s)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007606 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00007607 "CData section not finished\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007608 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007609 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007610 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007611 ctxt->instate = XML_PARSER_CONTENT;
7612 return;
7613 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007614 NEXTL(sl);
7615 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00007616 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7617 if (buf == NULL) {
7618 fprintf(stderr, "malloc of %d byte failed\n", size);
7619 return;
7620 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00007621 while (IS_CHAR(cur) &&
7622 ((r != ']') || (s != ']') || (cur != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007623 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007624 size *= 2;
7625 buf = xmlRealloc(buf, size * sizeof(xmlChar));
7626 if (buf == NULL) {
7627 fprintf(stderr, "realloc of %d byte failed\n", size);
7628 return;
7629 }
7630 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007631 COPY_BUF(rl,buf,len,r);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007632 r = s;
Daniel Veillardcf461992000-03-14 18:30:20 +00007633 rl = sl;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007634 s = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00007635 sl = l;
7636 NEXTL(l);
7637 cur = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007638 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00007639 buf[len] = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007640 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00007641 if (cur != '>') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007643 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00007644 "CData section not finished\n%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007645 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007646 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007647 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007648 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007649 return;
7650 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007651 NEXTL(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007652
7653 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00007654 * Ok the buffer is to be consumed as cdata.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007655 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007656 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007657 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillard10a2c651999-12-12 13:03:50 +00007658 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007659 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00007660 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007661}
7662
Daniel Veillard11e00581998-10-24 18:27:49 +00007663/**
7664 * xmlParseContent:
7665 * @ctxt: an XML parser context
7666 *
7667 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00007668 *
7669 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7670 */
7671
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007672void
7673xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007674 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00007675 while (((RAW != 0) || (ctxt->token != 0)) &&
7676 ((RAW != '<') || (NXT(1) != '/'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007677 const xmlChar *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007678 int cons = ctxt->input->consumed;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007679 xmlChar tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007680
7681 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00007682 * Handle possible processed charrefs.
7683 */
7684 if (ctxt->token != 0) {
7685 xmlParseCharData(ctxt, 0);
7686 }
7687 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007688 * First case : a Processing Instruction.
7689 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007690 else if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007691 xmlParsePI(ctxt);
7692 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007693
Daniel Veillard260a68f1998-08-13 03:39:55 +00007694 /*
7695 * Second case : a CDSection
7696 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007697 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007698 (NXT(2) == '[') && (NXT(3) == 'C') &&
7699 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7700 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7701 (NXT(8) == '[')) {
7702 xmlParseCDSect(ctxt);
7703 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007704
Daniel Veillard260a68f1998-08-13 03:39:55 +00007705 /*
7706 * Third case : a comment
7707 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007708 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007709 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00007710 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007711 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007712 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007713
Daniel Veillard260a68f1998-08-13 03:39:55 +00007714 /*
7715 * Fourth case : a sub-element.
7716 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007717 else if (RAW == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00007718 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007719 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007720
Daniel Veillard260a68f1998-08-13 03:39:55 +00007721 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00007722 * Fifth case : a reference. If if has not been resolved,
7723 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00007724 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00007725
Daniel Veillardcf461992000-03-14 18:30:20 +00007726 else if (RAW == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00007727 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007728 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007729
Daniel Veillard260a68f1998-08-13 03:39:55 +00007730 /*
7731 * Last case, text. Note that References are handled directly.
7732 */
7733 else {
7734 xmlParseCharData(ctxt, 0);
7735 }
7736
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007737 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007738 /*
7739 * Pop-up of finished entities.
7740 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007741 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillardbc50b591999-03-01 12:28:53 +00007742 xmlPopInput(ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +00007743 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007744
Daniel Veillardb96e6431999-08-29 21:02:19 +00007745 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7746 (tok == ctxt->token)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007748 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007749 "detected an error in element content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007750 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007751 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007752 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007753 break;
7754 }
7755 }
7756}
7757
Daniel Veillard11e00581998-10-24 18:27:49 +00007758/**
7759 * xmlParseElement:
7760 * @ctxt: an XML parser context
7761 *
7762 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00007763 *
7764 * [39] element ::= EmptyElemTag | STag content ETag
7765 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007766 * [ WFC: Element Type Match ]
7767 * The Name in an element's end-tag must match the element type in the
7768 * start-tag.
7769 *
7770 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00007771 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00007772 * where the Name matches the element type and one of the following holds:
7773 * - The declaration matches EMPTY and the element has no content.
7774 * - The declaration matches children and the sequence of child elements
7775 * belongs to the language generated by the regular expression in the
7776 * content model, with optional white space (characters matching the
7777 * nonterminal S) between each pair of child elements.
7778 * - The declaration matches Mixed and the content consists of character
7779 * data and child elements whose types match names in the content model.
7780 * - The declaration matches ANY, and the types of any child elements have
7781 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007782 */
7783
Daniel Veillard517752b1999-04-05 12:20:10 +00007784void
Daniel Veillard1e346af1999-02-22 10:33:01 +00007785xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007786 const xmlChar *openTag = CUR_PTR;
7787 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007788 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007789 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00007790 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007791
7792 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00007793 if (ctxt->record_info) {
7794 node_info.begin_pos = ctxt->input->consumed +
7795 (CUR_PTR - ctxt->input->base);
7796 node_info.begin_line = ctxt->input->line;
7797 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007798
Daniel Veillardcf461992000-03-14 18:30:20 +00007799 if (ctxt->spaceNr == 0)
7800 spacePush(ctxt, -1);
7801 else
7802 spacePush(ctxt, *ctxt->space);
7803
Daniel Veillard14fff061999-06-22 21:49:07 +00007804 name = xmlParseStartTag(ctxt);
7805 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007806 spacePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00007807 return;
7808 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007809 namePush(ctxt, name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00007810 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007811
7812 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007813 * [ VC: Root Element Type ]
7814 * The Name in the document type declaration must match the element
7815 * type of the root element.
7816 */
7817 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007818 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillardb05deb71999-08-10 19:04:08 +00007819 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7820
7821 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007822 * Check for an Empty Element.
7823 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007824 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007825 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00007826 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7827 (!ctxt->disableSAX))
Daniel Veillard14fff061999-06-22 21:49:07 +00007828 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007829 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007830 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007831 if (oldname != NULL) {
7832#ifdef DEBUG_STACK
7833 fprintf(stderr,"Close: popped %s\n", oldname);
7834#endif
7835 xmlFree(oldname);
7836 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007837 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007838 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007839 if (RAW == '>') {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007840 NEXT;
7841 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007842 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007843 ctxt->sax->error(ctxt->userData,
7844 "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00007845 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007846 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007847 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007848 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007849
7850 /*
7851 * end of parsing of this node.
7852 */
7853 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007854 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007855 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007856 if (oldname != NULL) {
7857#ifdef DEBUG_STACK
7858 fprintf(stderr,"Close: popped %s\n", oldname);
7859#endif
7860 xmlFree(oldname);
7861 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00007862
7863 /*
7864 * Capture end position and add node
7865 */
7866 if ( ret != NULL && ctxt->record_info ) {
7867 node_info.end_pos = ctxt->input->consumed +
7868 (CUR_PTR - ctxt->input->base);
7869 node_info.end_line = ctxt->input->line;
7870 node_info.node = ret;
7871 xmlParserAddNodeInfo(ctxt, &node_info);
7872 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007873 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007874 }
7875
7876 /*
7877 * Parse the content of the element:
7878 */
7879 xmlParseContent(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007880 if (!IS_CHAR(RAW)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007882 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00007883 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007884 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007885 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007886 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007887
7888 /*
7889 * end of parsing of this node.
7890 */
7891 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007892 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007893 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007894 if (oldname != NULL) {
7895#ifdef DEBUG_STACK
7896 fprintf(stderr,"Close: popped %s\n", oldname);
7897#endif
7898 xmlFree(oldname);
7899 }
Daniel Veillard517752b1999-04-05 12:20:10 +00007900 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007901 }
7902
7903 /*
7904 * parse the end of tag: '</' should be here.
7905 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007906 xmlParseEndTag(ctxt);
Daniel Veillardc26087b1999-08-30 11:23:51 +00007907
7908 /*
7909 * Capture end position and add node
7910 */
7911 if ( ret != NULL && ctxt->record_info ) {
7912 node_info.end_pos = ctxt->input->consumed +
7913 (CUR_PTR - ctxt->input->base);
7914 node_info.end_line = ctxt->input->line;
7915 node_info.node = ret;
7916 xmlParserAddNodeInfo(ctxt, &node_info);
7917 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007918}
7919
Daniel Veillard11e00581998-10-24 18:27:49 +00007920/**
7921 * xmlParseVersionNum:
7922 * @ctxt: an XML parser context
7923 *
7924 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007925 *
7926 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00007927 *
7928 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00007929 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007930xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007931xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007932 xmlChar *buf = NULL;
7933 int len = 0;
7934 int size = 10;
7935 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007936
Daniel Veillard10a2c651999-12-12 13:03:50 +00007937 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7938 if (buf == NULL) {
7939 fprintf(stderr, "malloc of %d byte failed\n", size);
7940 return(NULL);
7941 }
7942 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00007943 while (((cur >= 'a') && (cur <= 'z')) ||
7944 ((cur >= 'A') && (cur <= 'Z')) ||
7945 ((cur >= '0') && (cur <= '9')) ||
7946 (cur == '_') || (cur == '.') ||
7947 (cur == ':') || (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007948 if (len + 1 >= size) {
7949 size *= 2;
7950 buf = xmlRealloc(buf, size * sizeof(xmlChar));
7951 if (buf == NULL) {
7952 fprintf(stderr, "realloc of %d byte failed\n", size);
7953 return(NULL);
7954 }
7955 }
7956 buf[len++] = cur;
7957 NEXT;
7958 cur=CUR;
7959 }
7960 buf[len] = 0;
7961 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007962}
7963
Daniel Veillard11e00581998-10-24 18:27:49 +00007964/**
7965 * xmlParseVersionInfo:
7966 * @ctxt: an XML parser context
7967 *
7968 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007969 *
7970 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7971 *
7972 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00007973 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00007974 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00007975 */
7976
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007977xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007978xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007979 xmlChar *version = NULL;
7980 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007981
Daniel Veillardcf461992000-03-14 18:30:20 +00007982 if ((RAW == 'v') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00007983 (NXT(2) == 'r') && (NXT(3) == 's') &&
7984 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7985 (NXT(6) == 'n')) {
7986 SKIP(7);
7987 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007988 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007990 ctxt->sax->error(ctxt->userData,
7991 "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007992 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007993 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007994 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007995 return(NULL);
7996 }
7997 NEXT;
7998 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00007999 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008000 NEXT;
8001 q = CUR_PTR;
8002 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008003 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008005 ctxt->sax->error(ctxt->userData,
8006 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008007 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008008 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008009 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008010 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008011 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008012 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008013 NEXT;
8014 q = CUR_PTR;
8015 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008016 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008018 ctxt->sax->error(ctxt->userData,
8019 "String not closed\n%.50s\n", q);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008020 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008021 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008022 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008023 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008024 NEXT;
8025 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008027 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008028 "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008029 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008030 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008031 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008032 }
8033 }
8034 return(version);
8035}
8036
Daniel Veillard11e00581998-10-24 18:27:49 +00008037/**
8038 * xmlParseEncName:
8039 * @ctxt: an XML parser context
8040 *
8041 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00008042 *
8043 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00008044 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008045 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008046 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008047xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008048xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008049 xmlChar *buf = NULL;
8050 int len = 0;
8051 int size = 10;
8052 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008053
Daniel Veillard10a2c651999-12-12 13:03:50 +00008054 cur = CUR;
8055 if (((cur >= 'a') && (cur <= 'z')) ||
8056 ((cur >= 'A') && (cur <= 'Z'))) {
8057 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8058 if (buf == NULL) {
8059 fprintf(stderr, "malloc of %d byte failed\n", size);
8060 return(NULL);
8061 }
8062
8063 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008064 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00008065 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00008066 while (((cur >= 'a') && (cur <= 'z')) ||
8067 ((cur >= 'A') && (cur <= 'Z')) ||
8068 ((cur >= '0') && (cur <= '9')) ||
8069 (cur == '.') || (cur == '_') ||
8070 (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008071 if (len + 1 >= size) {
8072 size *= 2;
8073 buf = xmlRealloc(buf, size * sizeof(xmlChar));
8074 if (buf == NULL) {
8075 fprintf(stderr, "realloc of %d byte failed\n", size);
8076 return(NULL);
8077 }
8078 }
8079 buf[len++] = cur;
8080 NEXT;
8081 cur = CUR;
8082 if (cur == 0) {
8083 SHRINK;
8084 GROW;
8085 cur = CUR;
8086 }
8087 }
8088 buf[len] = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008089 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008091 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008092 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008093 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008094 ctxt->errNo = XML_ERR_ENCODING_NAME;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008095 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008096 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008097}
8098
Daniel Veillard11e00581998-10-24 18:27:49 +00008099/**
8100 * xmlParseEncodingDecl:
8101 * @ctxt: an XML parser context
8102 *
8103 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00008104 *
8105 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00008106 *
8107 * TODO: this should setup the conversion filters.
8108 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008109 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008110 */
8111
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008112xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008113xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008114 xmlChar *encoding = NULL;
8115 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008116
8117 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008118 if ((RAW == 'e') && (NXT(1) == 'n') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008119 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8120 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8121 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8122 SKIP(8);
8123 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008124 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008126 ctxt->sax->error(ctxt->userData,
8127 "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008128 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008129 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008130 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008131 return(NULL);
8132 }
8133 NEXT;
8134 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008135 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008136 NEXT;
8137 q = CUR_PTR;
8138 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008139 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008141 ctxt->sax->error(ctxt->userData,
8142 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008143 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008144 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008145 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008146 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008147 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008148 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008149 NEXT;
8150 q = CUR_PTR;
8151 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008152 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008153 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008154 ctxt->sax->error(ctxt->userData,
8155 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008156 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008157 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008158 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008159 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008160 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008161 } else if (RAW == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008163 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008164 "xmlParseEncodingDecl : expected ' or \"\n");
8165 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008166 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008167 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008168 }
8169 }
8170 return(encoding);
8171}
8172
Daniel Veillard11e00581998-10-24 18:27:49 +00008173/**
8174 * xmlParseSDDecl:
8175 * @ctxt: an XML parser context
8176 *
8177 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00008178 *
8179 * [32] SDDecl ::= S 'standalone' Eq
8180 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00008181 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008182 * [ VC: Standalone Document Declaration ]
8183 * TODO The standalone document declaration must have the value "no"
8184 * if any external markup declarations contain declarations of:
8185 * - attributes with default values, if elements to which these
8186 * attributes apply appear in the document without specifications
8187 * of values for these attributes, or
8188 * - entities (other than amp, lt, gt, apos, quot), if references
8189 * to those entities appear in the document, or
8190 * - attributes with values subject to normalization, where the
8191 * attribute appears in the document with a value which will change
8192 * as a result of normalization, or
8193 * - element types with element content, if white space occurs directly
8194 * within any instance of those types.
8195 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008196 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00008197 */
8198
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008199int
8200xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008201 int standalone = -1;
8202
8203 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008204 if ((RAW == 's') && (NXT(1) == 't') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008205 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8206 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8207 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8208 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8209 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00008210 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008211 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008213 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008214 "XML standalone declaration : expected '='\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008215 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008216 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008217 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008218 return(standalone);
8219 }
8220 NEXT;
8221 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008222 if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008223 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008224 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008225 standalone = 0;
8226 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008227 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008228 (NXT(2) == 's')) {
8229 standalone = 1;
8230 SKIP(3);
8231 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008233 ctxt->sax->error(ctxt->userData,
8234 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008235 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008236 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008237 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008238 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008239 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008241 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008242 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008243 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008244 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008245 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008246 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008247 } else if (RAW == '"'){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008248 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008249 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008250 standalone = 0;
8251 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008252 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008253 (NXT(2) == 's')) {
8254 standalone = 1;
8255 SKIP(3);
8256 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008258 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008259 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008260 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008261 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008262 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008263 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008264 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008266 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008267 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008268 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008269 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008270 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008271 NEXT;
8272 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008274 ctxt->sax->error(ctxt->userData,
8275 "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008276 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008277 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008278 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008279 }
8280 }
8281 return(standalone);
8282}
8283
Daniel Veillard11e00581998-10-24 18:27:49 +00008284/**
8285 * xmlParseXMLDecl:
8286 * @ctxt: an XML parser context
8287 *
8288 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00008289 *
8290 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8291 */
8292
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008293void
8294xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008295 xmlChar *version;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008296
8297 /*
8298 * We know that '<?xml' is here.
8299 */
8300 SKIP(5);
8301
Daniel Veillardcf461992000-03-14 18:30:20 +00008302 if (!IS_BLANK(RAW)) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008304 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008305 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008306 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008307 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008308 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008309 SKIP_BLANKS;
8310
8311 /*
8312 * We should have the VersionInfo here.
8313 */
8314 version = xmlParseVersionInfo(ctxt);
8315 if (version == NULL)
8316 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00008317 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00008318 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008319
8320 /*
8321 * We may have the encoding declaration
8322 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008323 if (!IS_BLANK(RAW)) {
8324 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008325 SKIP(2);
8326 return;
8327 }
8328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008329 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008330 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008331 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008332 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008333 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008334 ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008335
8336 /*
8337 * We may have the standalone status.
8338 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008339 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8340 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008341 SKIP(2);
8342 return;
8343 }
8344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008345 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008346 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008347 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008348 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008349 }
8350 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008351 ctxt->input->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008352
8353 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008354 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008355 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008356 } else if (RAW == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008357 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008359 ctxt->sax->error(ctxt->userData,
8360 "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008361 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008362 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008363 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008364 NEXT;
8365 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008367 ctxt->sax->error(ctxt->userData,
8368 "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008369 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008370 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008371 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008372 MOVETO_ENDTAG(CUR_PTR);
8373 NEXT;
8374 }
8375}
8376
Daniel Veillard11e00581998-10-24 18:27:49 +00008377/**
8378 * xmlParseMisc:
8379 * @ctxt: an XML parser context
8380 *
8381 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008382 *
8383 * [27] Misc ::= Comment | PI | S
8384 */
8385
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008386void
8387xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008388 while (((RAW == '<') && (NXT(1) == '?')) ||
8389 ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008390 (NXT(2) == '-') && (NXT(3) == '-')) ||
8391 IS_BLANK(CUR)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008392 if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008393 xmlParsePI(ctxt);
8394 } else if (IS_BLANK(CUR)) {
8395 NEXT;
8396 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00008397 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008398 }
8399}
8400
Daniel Veillard11e00581998-10-24 18:27:49 +00008401/**
Daniel Veillardcf461992000-03-14 18:30:20 +00008402 * xmlParseDocument:
Daniel Veillard11e00581998-10-24 18:27:49 +00008403 * @ctxt: an XML parser context
8404 *
8405 * parse an XML document (and build a tree if using the standard SAX
8406 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00008407 *
8408 * [1] document ::= prolog element Misc*
8409 *
8410 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00008411 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008412 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00008413 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008414 */
8415
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008416int
8417xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008418 xmlChar start[4];
8419 xmlCharEncoding enc;
8420
Daniel Veillard260a68f1998-08-13 03:39:55 +00008421 xmlDefaultSAXHandlerInit();
8422
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008423 GROW;
8424
Daniel Veillard260a68f1998-08-13 03:39:55 +00008425 /*
8426 * SAX: beginning of the document processing.
8427 */
Daniel Veillard517752b1999-04-05 12:20:10 +00008428 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00008429 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008430
Daniel Veillardcf461992000-03-14 18:30:20 +00008431 /*
8432 * Get the 4 first bytes and decode the charset
8433 * if enc != XML_CHAR_ENCODING_NONE
8434 * plug some encoding conversion routines.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008435 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008436 start[0] = RAW;
8437 start[1] = NXT(1);
8438 start[2] = NXT(2);
8439 start[3] = NXT(3);
8440 enc = xmlDetectCharEncoding(start, 4);
8441 if (enc != XML_CHAR_ENCODING_NONE) {
8442 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008443 }
8444
Daniel Veillardcf461992000-03-14 18:30:20 +00008445
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008446 if (CUR == 0) {
8447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008448 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008449 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008450 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008451 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008452 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008453
8454 /*
8455 * Check for the XMLDecl in the Prolog.
8456 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008457 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008458 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008459 (NXT(2) == 'x') && (NXT(3) == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00008460 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008461 xmlParseXMLDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008462 ctxt->standalone = ctxt->input->standalone;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008463 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008464 if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
8465 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8466
Daniel Veillard260a68f1998-08-13 03:39:55 +00008467 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00008468 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008469 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008470 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00008471 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008472
8473 /*
8474 * The Misc part of the Prolog
8475 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008476 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008477 xmlParseMisc(ctxt);
8478
8479 /*
8480 * Then possibly doc type declaration(s) and more Misc
8481 * (doctypedecl Misc*)?
8482 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008483 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008484 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008485 (NXT(2) == 'D') && (NXT(3) == 'O') &&
8486 (NXT(4) == 'C') && (NXT(5) == 'T') &&
8487 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8488 (NXT(8) == 'E')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008489
8490 ctxt->inSubset = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008491 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008492 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008493 ctxt->instate = XML_PARSER_DTD;
8494 xmlParseInternalSubset(ctxt);
8495 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008496
8497 /*
8498 * Create and update the external subset.
8499 */
8500 ctxt->inSubset = 2;
8501 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8502 (!ctxt->disableSAX))
8503 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8504 ctxt->extSubSystem, ctxt->extSubURI);
8505 ctxt->inSubset = 0;
8506
8507
Daniel Veillardb05deb71999-08-10 19:04:08 +00008508 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008509 xmlParseMisc(ctxt);
8510 }
8511
8512 /*
8513 * Time to start parsing the tree itself
8514 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008515 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008516 if (RAW != '<') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008518 ctxt->sax->error(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00008519 "Start tag expected, '<' not found\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008520 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008521 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008522 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008523 ctxt->instate = XML_PARSER_EOF;
8524 } else {
8525 ctxt->instate = XML_PARSER_CONTENT;
8526 xmlParseElement(ctxt);
8527 ctxt->instate = XML_PARSER_EPILOG;
8528
8529
8530 /*
8531 * The Misc part at the end
8532 */
8533 xmlParseMisc(ctxt);
8534
Daniel Veillardcf461992000-03-14 18:30:20 +00008535 if (RAW != 0) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8537 ctxt->sax->error(ctxt->userData,
8538 "Extra content at the end of the document\n");
8539 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008540 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008541 ctxt->errNo = XML_ERR_DOCUMENT_END;
8542 }
8543 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008544 }
8545
Daniel Veillard260a68f1998-08-13 03:39:55 +00008546 /*
8547 * SAX: end of the document processing.
8548 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008549 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8550 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00008551 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillardcf461992000-03-14 18:30:20 +00008552
8553 /*
8554 * Grab the encoding if it was added on-the-fly
8555 */
8556 if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8557 (ctxt->myDoc->encoding == NULL)) {
8558 ctxt->myDoc->encoding = ctxt->encoding;
8559 ctxt->encoding = NULL;
8560 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008561 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008562 return(0);
8563}
8564
Daniel Veillardb05deb71999-08-10 19:04:08 +00008565/************************************************************************
8566 * *
Daniel Veillard7f858501999-11-17 17:32:38 +00008567 * Progressive parsing interfaces *
8568 * *
8569 ************************************************************************/
8570
8571/**
8572 * xmlParseLookupSequence:
8573 * @ctxt: an XML parser context
8574 * @first: the first char to lookup
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008575 * @next: the next char to lookup or zero
8576 * @third: the next char to lookup or zero
Daniel Veillard7f858501999-11-17 17:32:38 +00008577 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008578 * Try to find if a sequence (first, next, third) or just (first next) or
8579 * (first) is available in the input stream.
8580 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8581 * to avoid rescanning sequences of bytes, it DOES change the state of the
8582 * parser, do not use liberally.
Daniel Veillard7f858501999-11-17 17:32:38 +00008583 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008584 * Returns the index to the current parsing point if the full sequence
8585 * is available, -1 otherwise.
Daniel Veillard7f858501999-11-17 17:32:38 +00008586 */
8587int
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008588xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8589 xmlChar next, xmlChar third) {
8590 int base, len;
8591 xmlParserInputPtr in;
8592 const xmlChar *buf;
8593
8594 in = ctxt->input;
8595 if (in == NULL) return(-1);
8596 base = in->cur - in->base;
8597 if (base < 0) return(-1);
8598 if (ctxt->checkIndex > base)
8599 base = ctxt->checkIndex;
8600 if (in->buf == NULL) {
8601 buf = in->base;
8602 len = in->length;
8603 } else {
8604 buf = in->buf->buffer->content;
8605 len = in->buf->buffer->use;
8606 }
8607 /* take into account the sequence length */
8608 if (third) len -= 2;
8609 else if (next) len --;
8610 for (;base < len;base++) {
8611 if (buf[base] == first) {
8612 if (third != 0) {
8613 if ((buf[base + 1] != next) ||
8614 (buf[base + 2] != third)) continue;
8615 } else if (next != 0) {
8616 if (buf[base + 1] != next) continue;
8617 }
8618 ctxt->checkIndex = 0;
8619#ifdef DEBUG_PUSH
8620 if (next == 0)
8621 fprintf(stderr, "PP: lookup '%c' found at %d\n",
8622 first, base);
8623 else if (third == 0)
8624 fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8625 first, next, base);
8626 else
8627 fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8628 first, next, third, base);
8629#endif
8630 return(base - (in->cur - in->base));
8631 }
8632 }
8633 ctxt->checkIndex = base;
8634#ifdef DEBUG_PUSH
8635 if (next == 0)
8636 fprintf(stderr, "PP: lookup '%c' failed\n", first);
8637 else if (third == 0)
8638 fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8639 else
8640 fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8641#endif
8642 return(-1);
Daniel Veillard7f858501999-11-17 17:32:38 +00008643}
8644
8645/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00008646 * xmlParseTryOrFinish:
Daniel Veillard7f858501999-11-17 17:32:38 +00008647 * @ctxt: an XML parser context
Daniel Veillard71b656e2000-01-05 14:46:17 +00008648 * @terminate: last chunk indicator
Daniel Veillard7f858501999-11-17 17:32:38 +00008649 *
8650 * Try to progress on parsing
8651 *
8652 * Returns zero if no parsing was possible
8653 */
8654int
Daniel Veillard71b656e2000-01-05 14:46:17 +00008655xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
Daniel Veillard7f858501999-11-17 17:32:38 +00008656 int ret = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008657 int avail;
8658 xmlChar cur, next;
8659
8660#ifdef DEBUG_PUSH
8661 switch (ctxt->instate) {
8662 case XML_PARSER_EOF:
8663 fprintf(stderr, "PP: try EOF\n"); break;
8664 case XML_PARSER_START:
8665 fprintf(stderr, "PP: try START\n"); break;
8666 case XML_PARSER_MISC:
8667 fprintf(stderr, "PP: try MISC\n");break;
8668 case XML_PARSER_COMMENT:
8669 fprintf(stderr, "PP: try COMMENT\n");break;
8670 case XML_PARSER_PROLOG:
8671 fprintf(stderr, "PP: try PROLOG\n");break;
8672 case XML_PARSER_START_TAG:
8673 fprintf(stderr, "PP: try START_TAG\n");break;
8674 case XML_PARSER_CONTENT:
8675 fprintf(stderr, "PP: try CONTENT\n");break;
8676 case XML_PARSER_CDATA_SECTION:
8677 fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8678 case XML_PARSER_END_TAG:
8679 fprintf(stderr, "PP: try END_TAG\n");break;
8680 case XML_PARSER_ENTITY_DECL:
8681 fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8682 case XML_PARSER_ENTITY_VALUE:
8683 fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8684 case XML_PARSER_ATTRIBUTE_VALUE:
8685 fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8686 case XML_PARSER_DTD:
8687 fprintf(stderr, "PP: try DTD\n");break;
8688 case XML_PARSER_EPILOG:
8689 fprintf(stderr, "PP: try EPILOG\n");break;
8690 case XML_PARSER_PI:
8691 fprintf(stderr, "PP: try PI\n");break;
8692 }
8693#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00008694
8695 while (1) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008696 /*
8697 * Pop-up of finished entities.
8698 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008699 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008700 xmlPopInput(ctxt);
8701
Daniel Veillardcf461992000-03-14 18:30:20 +00008702 if (ctxt->input ==NULL) break;
8703 if (ctxt->input->buf == NULL)
8704 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008705 else
Daniel Veillardcf461992000-03-14 18:30:20 +00008706 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008707 if (avail < 1)
8708 goto done;
Daniel Veillard7f858501999-11-17 17:32:38 +00008709 switch (ctxt->instate) {
8710 case XML_PARSER_EOF:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008711 /*
8712 * Document parsing is done !
8713 */
8714 goto done;
8715 case XML_PARSER_START:
8716 /*
8717 * Very first chars read from the document flow.
8718 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008719 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008720 if (IS_BLANK(cur)) {
8721 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8722 ctxt->sax->setDocumentLocator(ctxt->userData,
8723 &xmlDefaultSAXLocator);
8724 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8725 ctxt->sax->error(ctxt->userData,
8726 "Extra spaces at the beginning of the document are not allowed\n");
8727 ctxt->errNo = XML_ERR_DOCUMENT_START;
8728 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008729 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008730 SKIP_BLANKS;
8731 ret++;
Daniel Veillardcf461992000-03-14 18:30:20 +00008732 if (ctxt->input->buf == NULL)
8733 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008734 else
Daniel Veillardcf461992000-03-14 18:30:20 +00008735 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008736 }
8737 if (avail < 2)
8738 goto done;
8739
Daniel Veillardcf461992000-03-14 18:30:20 +00008740 cur = ctxt->input->cur[0];
8741 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008742 if (cur == 0) {
8743 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8744 ctxt->sax->setDocumentLocator(ctxt->userData,
8745 &xmlDefaultSAXLocator);
8746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8747 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8748 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8749 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008750 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008751 ctxt->instate = XML_PARSER_EOF;
8752#ifdef DEBUG_PUSH
8753 fprintf(stderr, "PP: entering EOF\n");
8754#endif
8755 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8756 ctxt->sax->endDocument(ctxt->userData);
8757 goto done;
8758 }
8759 if ((cur == '<') && (next == '?')) {
8760 /* PI or XML decl */
8761 if (avail < 5) return(ret);
Daniel Veillard71b656e2000-01-05 14:46:17 +00008762 if ((!terminate) &&
8763 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008764 return(ret);
8765 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8766 ctxt->sax->setDocumentLocator(ctxt->userData,
8767 &xmlDefaultSAXLocator);
Daniel Veillardcf461992000-03-14 18:30:20 +00008768 if ((ctxt->input->cur[2] == 'x') &&
8769 (ctxt->input->cur[3] == 'm') &&
8770 (ctxt->input->cur[4] == 'l') &&
8771 (IS_BLANK(ctxt->input->cur[5]))) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008772 ret += 5;
8773#ifdef DEBUG_PUSH
8774 fprintf(stderr, "PP: Parsing XML Decl\n");
8775#endif
8776 xmlParseXMLDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008777 ctxt->standalone = ctxt->input->standalone;
8778 if ((ctxt->encoding == NULL) &&
8779 (ctxt->input->encoding != NULL))
8780 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8781 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8782 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008783 ctxt->sax->startDocument(ctxt->userData);
8784 ctxt->instate = XML_PARSER_MISC;
8785#ifdef DEBUG_PUSH
8786 fprintf(stderr, "PP: entering MISC\n");
8787#endif
8788 } else {
8789 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00008790 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8791 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008792 ctxt->sax->startDocument(ctxt->userData);
8793 ctxt->instate = XML_PARSER_MISC;
8794#ifdef DEBUG_PUSH
8795 fprintf(stderr, "PP: entering MISC\n");
8796#endif
8797 }
8798 } else {
8799 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8800 ctxt->sax->setDocumentLocator(ctxt->userData,
8801 &xmlDefaultSAXLocator);
8802 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00008803 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8804 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008805 ctxt->sax->startDocument(ctxt->userData);
8806 ctxt->instate = XML_PARSER_MISC;
8807#ifdef DEBUG_PUSH
8808 fprintf(stderr, "PP: entering MISC\n");
8809#endif
8810 }
8811 break;
8812 case XML_PARSER_MISC:
8813 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008814 if (ctxt->input->buf == NULL)
8815 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008816 else
Daniel Veillardcf461992000-03-14 18:30:20 +00008817 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008818 if (avail < 2)
8819 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00008820 cur = ctxt->input->cur[0];
8821 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008822 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008823 if ((!terminate) &&
8824 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008825 goto done;
8826#ifdef DEBUG_PUSH
8827 fprintf(stderr, "PP: Parsing PI\n");
8828#endif
8829 xmlParsePI(ctxt);
8830 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00008831 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008832 if ((!terminate) &&
8833 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008834 goto done;
8835#ifdef DEBUG_PUSH
8836 fprintf(stderr, "PP: Parsing Comment\n");
8837#endif
8838 xmlParseComment(ctxt);
8839 ctxt->instate = XML_PARSER_MISC;
8840 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00008841 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8842 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8843 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8844 (ctxt->input->cur[8] == 'E')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008845 if ((!terminate) &&
8846 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008847 goto done;
8848#ifdef DEBUG_PUSH
8849 fprintf(stderr, "PP: Parsing internal subset\n");
8850#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00008851 ctxt->inSubset = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008852 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008853 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008854 ctxt->instate = XML_PARSER_DTD;
8855#ifdef DEBUG_PUSH
8856 fprintf(stderr, "PP: entering DTD\n");
8857#endif
8858 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00008859 /*
8860 * Create and update the external subset.
8861 */
8862 ctxt->inSubset = 2;
8863 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8864 (ctxt->sax->externalSubset != NULL))
8865 ctxt->sax->externalSubset(ctxt->userData,
8866 ctxt->intSubName, ctxt->extSubSystem,
8867 ctxt->extSubURI);
8868 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008869 ctxt->instate = XML_PARSER_PROLOG;
8870#ifdef DEBUG_PUSH
8871 fprintf(stderr, "PP: entering PROLOG\n");
8872#endif
8873 }
8874 } else if ((cur == '<') && (next == '!') &&
8875 (avail < 9)) {
8876 goto done;
8877 } else {
8878 ctxt->instate = XML_PARSER_START_TAG;
8879#ifdef DEBUG_PUSH
8880 fprintf(stderr, "PP: entering START_TAG\n");
8881#endif
8882 }
8883 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00008884 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008885 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008886 if (ctxt->input->buf == NULL)
8887 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008888 else
Daniel Veillardcf461992000-03-14 18:30:20 +00008889 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008890 if (avail < 2)
8891 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00008892 cur = ctxt->input->cur[0];
8893 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008894 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008895 if ((!terminate) &&
8896 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008897 goto done;
8898#ifdef DEBUG_PUSH
8899 fprintf(stderr, "PP: Parsing PI\n");
8900#endif
8901 xmlParsePI(ctxt);
8902 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00008903 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008904 if ((!terminate) &&
8905 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008906 goto done;
8907#ifdef DEBUG_PUSH
8908 fprintf(stderr, "PP: Parsing Comment\n");
8909#endif
8910 xmlParseComment(ctxt);
8911 ctxt->instate = XML_PARSER_PROLOG;
8912 } else if ((cur == '<') && (next == '!') &&
8913 (avail < 4)) {
8914 goto done;
8915 } else {
8916 ctxt->instate = XML_PARSER_START_TAG;
8917#ifdef DEBUG_PUSH
8918 fprintf(stderr, "PP: entering START_TAG\n");
8919#endif
8920 }
8921 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00008922 case XML_PARSER_EPILOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008923 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008924 if (ctxt->input->buf == NULL)
8925 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008926 else
Daniel Veillardcf461992000-03-14 18:30:20 +00008927 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008928 if (avail < 2)
8929 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00008930 cur = ctxt->input->cur[0];
8931 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008932 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008933 if ((!terminate) &&
8934 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008935 goto done;
8936#ifdef DEBUG_PUSH
8937 fprintf(stderr, "PP: Parsing PI\n");
8938#endif
8939 xmlParsePI(ctxt);
8940 ctxt->instate = XML_PARSER_EPILOG;
8941 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00008942 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00008943 if ((!terminate) &&
8944 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008945 goto done;
8946#ifdef DEBUG_PUSH
8947 fprintf(stderr, "PP: Parsing Comment\n");
8948#endif
8949 xmlParseComment(ctxt);
8950 ctxt->instate = XML_PARSER_EPILOG;
8951 } else if ((cur == '<') && (next == '!') &&
8952 (avail < 4)) {
8953 goto done;
8954 } else {
8955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8956 ctxt->sax->error(ctxt->userData,
8957 "Extra content at the end of the document\n");
8958 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008959 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008960 ctxt->errNo = XML_ERR_DOCUMENT_END;
8961 ctxt->instate = XML_PARSER_EOF;
8962#ifdef DEBUG_PUSH
8963 fprintf(stderr, "PP: entering EOF\n");
8964#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00008965 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8966 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008967 ctxt->sax->endDocument(ctxt->userData);
8968 goto done;
8969 }
8970 break;
8971 case XML_PARSER_START_TAG: {
8972 xmlChar *name, *oldname;
8973
Daniel Veillardcf461992000-03-14 18:30:20 +00008974 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008975 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00008976 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008977 if (cur != '<') {
8978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8979 ctxt->sax->error(ctxt->userData,
8980 "Start tag expect, '<' not found\n");
8981 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8982 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008983 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008984 ctxt->instate = XML_PARSER_EOF;
8985#ifdef DEBUG_PUSH
8986 fprintf(stderr, "PP: entering EOF\n");
8987#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00008988 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8989 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008990 ctxt->sax->endDocument(ctxt->userData);
8991 goto done;
8992 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00008993 if ((!terminate) &&
8994 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008995 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00008996 if (ctxt->spaceNr == 0)
8997 spacePush(ctxt, -1);
8998 else
8999 spacePush(ctxt, *ctxt->space);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009000 name = xmlParseStartTag(ctxt);
9001 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009002 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009003 ctxt->instate = XML_PARSER_EOF;
9004#ifdef DEBUG_PUSH
9005 fprintf(stderr, "PP: entering EOF\n");
9006#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009007 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9008 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009009 ctxt->sax->endDocument(ctxt->userData);
9010 goto done;
9011 }
9012 namePush(ctxt, xmlStrdup(name));
9013
9014 /*
9015 * [ VC: Root Element Type ]
9016 * The Name in the document type declaration must match
9017 * the element type of the root element.
9018 */
9019 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009020 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009021 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9022
9023 /*
9024 * Check for an Empty Element.
9025 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009026 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009027 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00009028 if ((ctxt->sax != NULL) &&
9029 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009030 ctxt->sax->endElement(ctxt->userData, name);
9031 xmlFree(name);
9032 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009033 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009034 if (oldname != NULL) {
9035#ifdef DEBUG_STACK
9036 fprintf(stderr,"Close: popped %s\n", oldname);
9037#endif
9038 xmlFree(oldname);
9039 }
9040 if (ctxt->name == NULL) {
9041 ctxt->instate = XML_PARSER_EPILOG;
9042#ifdef DEBUG_PUSH
9043 fprintf(stderr, "PP: entering EPILOG\n");
9044#endif
9045 } else {
9046 ctxt->instate = XML_PARSER_CONTENT;
9047#ifdef DEBUG_PUSH
9048 fprintf(stderr, "PP: entering CONTENT\n");
9049#endif
9050 }
9051 break;
9052 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009053 if (RAW == '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009054 NEXT;
9055 } else {
9056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9057 ctxt->sax->error(ctxt->userData,
9058 "Couldn't find end of Start Tag %s\n",
9059 name);
9060 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009061 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009062 ctxt->errNo = XML_ERR_GT_REQUIRED;
9063
9064 /*
9065 * end of parsing of this node.
9066 */
9067 nodePop(ctxt);
9068 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009069 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009070 if (oldname != NULL) {
9071#ifdef DEBUG_STACK
9072 fprintf(stderr,"Close: popped %s\n", oldname);
9073#endif
9074 xmlFree(oldname);
9075 }
9076 }
9077 xmlFree(name);
9078 ctxt->instate = XML_PARSER_CONTENT;
9079#ifdef DEBUG_PUSH
9080 fprintf(stderr, "PP: entering CONTENT\n");
9081#endif
9082 break;
9083 }
9084 case XML_PARSER_CONTENT:
9085 /*
9086 * Handle preparsed entities and charRef
9087 */
9088 if (ctxt->token != 0) {
9089 xmlChar cur[2] = { 0 , 0 } ;
9090
9091 cur[0] = (xmlChar) ctxt->token;
Daniel Veillardcf461992000-03-14 18:30:20 +00009092 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9093 (ctxt->sax->characters != NULL))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009094 ctxt->sax->characters(ctxt->userData, cur, 1);
9095 ctxt->token = 0;
9096 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009097 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009098 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009099 cur = ctxt->input->cur[0];
9100 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009101 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009102 if ((!terminate) &&
9103 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009104 goto done;
9105#ifdef DEBUG_PUSH
9106 fprintf(stderr, "PP: Parsing PI\n");
9107#endif
9108 xmlParsePI(ctxt);
9109 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009110 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009111 if ((!terminate) &&
9112 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009113 goto done;
9114#ifdef DEBUG_PUSH
9115 fprintf(stderr, "PP: Parsing Comment\n");
9116#endif
9117 xmlParseComment(ctxt);
9118 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009119 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9120 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9121 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9122 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9123 (ctxt->input->cur[8] == '[')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009124 SKIP(9);
9125 ctxt->instate = XML_PARSER_CDATA_SECTION;
9126#ifdef DEBUG_PUSH
9127 fprintf(stderr, "PP: entering CDATA_SECTION\n");
9128#endif
9129 break;
9130 } else if ((cur == '<') && (next == '!') &&
9131 (avail < 9)) {
9132 goto done;
9133 } else if ((cur == '<') && (next == '/')) {
9134 ctxt->instate = XML_PARSER_END_TAG;
9135#ifdef DEBUG_PUSH
9136 fprintf(stderr, "PP: entering END_TAG\n");
9137#endif
9138 break;
9139 } else if (cur == '<') {
9140 ctxt->instate = XML_PARSER_START_TAG;
9141#ifdef DEBUG_PUSH
9142 fprintf(stderr, "PP: entering START_TAG\n");
9143#endif
9144 break;
9145 } else if (cur == '&') {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009146 if ((!terminate) &&
9147 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009148 goto done;
9149#ifdef DEBUG_PUSH
9150 fprintf(stderr, "PP: Parsing Reference\n");
9151#endif
9152 /* TODO: check generation of subtrees if noent !!! */
9153 xmlParseReference(ctxt);
9154 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00009155 /* TODO Avoid the extra copy, handle directly !!! */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009156 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00009157 * Goal of the following test is:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009158 * - minimize calls to the SAX 'character' callback
9159 * when they are mergeable
9160 * - handle an problem for isBlank when we only parse
9161 * a sequence of blank chars and the next one is
9162 * not available to check against '<' presence.
9163 * - tries to homogenize the differences in SAX
9164 * callbacks beween the push and pull versions
9165 * of the parser.
9166 */
9167 if ((ctxt->inputNr == 1) &&
9168 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009169 if ((!terminate) &&
9170 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009171 goto done;
9172 }
9173 ctxt->checkIndex = 0;
9174#ifdef DEBUG_PUSH
9175 fprintf(stderr, "PP: Parsing char data\n");
9176#endif
9177 xmlParseCharData(ctxt, 0);
9178 }
9179 /*
9180 * Pop-up of finished entities.
9181 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009182 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009183 xmlPopInput(ctxt);
9184 break;
9185 case XML_PARSER_CDATA_SECTION: {
9186 /*
9187 * The Push mode need to have the SAX callback for
9188 * cdataBlock merge back contiguous callbacks.
9189 */
9190 int base;
9191
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009192 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9193 if (base < 0) {
9194 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009195 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009196 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00009197 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009198 XML_PARSER_BIG_BUFFER_SIZE);
9199 }
9200 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9201 ctxt->checkIndex = 0;
9202 }
9203 goto done;
9204 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00009205 if ((ctxt->sax != NULL) && (base > 0) &&
9206 (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009207 if (ctxt->sax->cdataBlock != NULL)
9208 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00009209 ctxt->input->cur, base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009210 }
9211 SKIP(base + 3);
9212 ctxt->checkIndex = 0;
9213 ctxt->instate = XML_PARSER_CONTENT;
9214#ifdef DEBUG_PUSH
9215 fprintf(stderr, "PP: entering CONTENT\n");
9216#endif
9217 }
9218 break;
9219 }
Daniel Veillard5e5c6231999-12-29 12:49:06 +00009220 case XML_PARSER_END_TAG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009221 if (avail < 2)
9222 goto done;
Daniel Veillard71b656e2000-01-05 14:46:17 +00009223 if ((!terminate) &&
9224 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009225 goto done;
9226 xmlParseEndTag(ctxt);
9227 if (ctxt->name == NULL) {
9228 ctxt->instate = XML_PARSER_EPILOG;
9229#ifdef DEBUG_PUSH
9230 fprintf(stderr, "PP: entering EPILOG\n");
9231#endif
9232 } else {
9233 ctxt->instate = XML_PARSER_CONTENT;
9234#ifdef DEBUG_PUSH
9235 fprintf(stderr, "PP: entering CONTENT\n");
9236#endif
9237 }
9238 break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009239 case XML_PARSER_DTD: {
9240 /*
9241 * Sorry but progressive parsing of the internal subset
9242 * is not expected to be supported. We first check that
9243 * the full content of the internal subset is available and
9244 * the parsing is launched only at that point.
9245 * Internal subset ends up with "']' S? '>'" in an unescaped
9246 * section and not in a ']]>' sequence which are conditional
9247 * sections (whoever argued to keep that crap in XML deserve
9248 * a place in hell !).
9249 */
9250 int base, i;
9251 xmlChar *buf;
9252 xmlChar quote = 0;
9253
Daniel Veillardcf461992000-03-14 18:30:20 +00009254 base = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009255 if (base < 0) return(0);
9256 if (ctxt->checkIndex > base)
9257 base = ctxt->checkIndex;
Daniel Veillardcf461992000-03-14 18:30:20 +00009258 buf = ctxt->input->buf->buffer->content;
9259 for (;base < ctxt->input->buf->buffer->use;base++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009260 if (quote != 0) {
9261 if (buf[base] == quote)
9262 quote = 0;
9263 continue;
9264 }
9265 if (buf[base] == '"') {
9266 quote = '"';
9267 continue;
9268 }
9269 if (buf[base] == '\'') {
9270 quote = '\'';
9271 continue;
9272 }
9273 if (buf[base] == ']') {
Daniel Veillardcf461992000-03-14 18:30:20 +00009274 if (base +1 >= ctxt->input->buf->buffer->use)
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009275 break;
9276 if (buf[base + 1] == ']') {
9277 /* conditional crap, skip both ']' ! */
9278 base++;
9279 continue;
9280 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009281 for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009282 if (buf[base + i] == '>')
9283 goto found_end_int_subset;
9284 }
9285 break;
9286 }
9287 }
9288 /*
9289 * We didn't found the end of the Internal subset
9290 */
9291 if (quote == 0)
9292 ctxt->checkIndex = base;
9293#ifdef DEBUG_PUSH
9294 if (next == 0)
9295 fprintf(stderr, "PP: lookup of int subset end filed\n");
9296#endif
9297 goto done;
9298
9299found_end_int_subset:
9300 xmlParseInternalSubset(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009301 ctxt->inSubset = 2;
9302 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9303 (ctxt->sax->externalSubset != NULL))
9304 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9305 ctxt->extSubSystem, ctxt->extSubURI);
9306 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009307 ctxt->instate = XML_PARSER_PROLOG;
9308 ctxt->checkIndex = 0;
9309#ifdef DEBUG_PUSH
9310 fprintf(stderr, "PP: entering PROLOG\n");
9311#endif
9312 break;
9313 }
Daniel Veillard7f858501999-11-17 17:32:38 +00009314 case XML_PARSER_COMMENT:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009315 fprintf(stderr, "PP: internal error, state == COMMENT\n");
9316 ctxt->instate = XML_PARSER_CONTENT;
9317#ifdef DEBUG_PUSH
9318 fprintf(stderr, "PP: entering CONTENT\n");
9319#endif
9320 break;
9321 case XML_PARSER_PI:
9322 fprintf(stderr, "PP: internal error, state == PI\n");
9323 ctxt->instate = XML_PARSER_CONTENT;
9324#ifdef DEBUG_PUSH
9325 fprintf(stderr, "PP: entering CONTENT\n");
9326#endif
9327 break;
9328 case XML_PARSER_ENTITY_DECL:
9329 fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9330 ctxt->instate = XML_PARSER_DTD;
9331#ifdef DEBUG_PUSH
9332 fprintf(stderr, "PP: entering DTD\n");
9333#endif
9334 break;
9335 case XML_PARSER_ENTITY_VALUE:
9336 fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9337 ctxt->instate = XML_PARSER_CONTENT;
9338#ifdef DEBUG_PUSH
9339 fprintf(stderr, "PP: entering DTD\n");
9340#endif
9341 break;
9342 case XML_PARSER_ATTRIBUTE_VALUE:
9343 fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
9344 ctxt->instate = XML_PARSER_START_TAG;
9345#ifdef DEBUG_PUSH
9346 fprintf(stderr, "PP: entering START_TAG\n");
9347#endif
9348 break;
Daniel Veillardcf461992000-03-14 18:30:20 +00009349 case XML_PARSER_SYSTEM_LITERAL:
9350 fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
9351 ctxt->instate = XML_PARSER_START_TAG;
9352#ifdef DEBUG_PUSH
9353 fprintf(stderr, "PP: entering START_TAG\n");
9354#endif
9355 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00009356 }
9357 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009358done:
9359#ifdef DEBUG_PUSH
9360 fprintf(stderr, "PP: done %d\n", ret);
9361#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00009362 return(ret);
9363}
9364
9365/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00009366 * xmlParseTry:
9367 * @ctxt: an XML parser context
9368 *
9369 * Try to progress on parsing
9370 *
9371 * Returns zero if no parsing was possible
9372 */
9373int
9374xmlParseTry(xmlParserCtxtPtr ctxt) {
9375 return(xmlParseTryOrFinish(ctxt, 0));
9376}
9377
9378/**
Daniel Veillard7f858501999-11-17 17:32:38 +00009379 * xmlParseChunk:
9380 * @ctxt: an XML parser context
9381 * @chunk: an char array
9382 * @size: the size in byte of the chunk
9383 * @terminate: last chunk indicator
9384 *
9385 * Parse a Chunk of memory
9386 *
9387 * Returns zero if no error, the xmlParserErrors otherwise.
9388 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009389int
Daniel Veillard7f858501999-11-17 17:32:38 +00009390xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9391 int terminate) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00009392 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009393 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9394 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9395 int cur = ctxt->input->cur - ctxt->input->base;
9396
Daniel Veillarda819dac1999-11-24 18:04:22 +00009397 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009398 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9399 ctxt->input->cur = ctxt->input->base + cur;
9400#ifdef DEBUG_PUSH
9401 fprintf(stderr, "PP: pushed %d\n", size);
9402#endif
9403
Daniel Veillardd0f7f742000-02-02 17:42:48 +00009404 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9405 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009406 } else if (ctxt->instate != XML_PARSER_EOF)
Daniel Veillard71b656e2000-01-05 14:46:17 +00009407 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009408 if (terminate) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009409 /*
9410 * Grab the encoding if it was added on-the-fly
9411 */
9412 if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9413 (ctxt->myDoc->encoding == NULL)) {
9414 ctxt->myDoc->encoding = ctxt->encoding;
9415 ctxt->encoding = NULL;
9416 }
9417
9418 /*
9419 * Check for termination
9420 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009421 if ((ctxt->instate != XML_PARSER_EOF) &&
9422 (ctxt->instate != XML_PARSER_EPILOG)) {
9423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9424 ctxt->sax->error(ctxt->userData,
9425 "Extra content at the end of the document\n");
9426 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009427 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009428 ctxt->errNo = XML_ERR_DOCUMENT_END;
9429 }
9430 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009431 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9432 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009433 ctxt->sax->endDocument(ctxt->userData);
9434 }
9435 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard7f858501999-11-17 17:32:38 +00009436 }
9437 return((xmlParserErrors) ctxt->errNo);
9438}
9439
9440/************************************************************************
9441 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00009442 * I/O front end functions to the parser *
9443 * *
9444 ************************************************************************/
9445
Daniel Veillard11e00581998-10-24 18:27:49 +00009446/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009447 * xmlCreatePushParserCtxt:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009448 * @sax: a SAX handler
9449 * @user_data: The user data returned on SAX callbacks
9450 * @chunk: a pointer to an array of chars
9451 * @size: number of chars in the array
9452 * @filename: an optional file name or URI
9453 *
9454 * Create a parser context for using the XML parser in push mode
9455 * To allow content encoding detection, @size should be >= 4
9456 * The value of @filename is used for fetching external entities
9457 * and error/warning reports.
9458 *
9459 * Returns the new parser context or NULL
9460 */
9461xmlParserCtxtPtr
9462xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9463 const char *chunk, int size, const char *filename) {
9464 xmlParserCtxtPtr ctxt;
9465 xmlParserInputPtr inputStream;
9466 xmlParserInputBufferPtr buf;
9467 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9468
9469 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00009470 * plug some encoding conversion routines
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009471 */
9472 if ((chunk != NULL) && (size >= 4))
Daniel Veillardcf461992000-03-14 18:30:20 +00009473 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009474
9475 buf = xmlAllocParserInputBuffer(enc);
9476 if (buf == NULL) return(NULL);
9477
9478 ctxt = xmlNewParserCtxt();
9479 if (ctxt == NULL) {
9480 xmlFree(buf);
9481 return(NULL);
9482 }
9483 if (sax != NULL) {
9484 if (ctxt->sax != &xmlDefaultSAXHandler)
9485 xmlFree(ctxt->sax);
9486 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9487 if (ctxt->sax == NULL) {
9488 xmlFree(buf);
9489 xmlFree(ctxt);
9490 return(NULL);
9491 }
9492 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9493 if (user_data != NULL)
9494 ctxt->userData = user_data;
9495 }
9496 if (filename == NULL) {
9497 ctxt->directory = NULL;
9498 } else {
9499 ctxt->directory = xmlParserGetDirectory(filename);
9500 }
9501
9502 inputStream = xmlNewInputStream(ctxt);
9503 if (inputStream == NULL) {
9504 xmlFreeParserCtxt(ctxt);
9505 return(NULL);
9506 }
9507
9508 if (filename == NULL)
9509 inputStream->filename = NULL;
9510 else
9511 inputStream->filename = xmlMemStrdup(filename);
9512 inputStream->buf = buf;
9513 inputStream->base = inputStream->buf->buffer->content;
9514 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +00009515 if (enc != XML_CHAR_ENCODING_NONE) {
9516 xmlSwitchEncoding(ctxt, enc);
9517 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009518
9519 inputPush(ctxt, inputStream);
9520
9521 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9522 (ctxt->input->buf != NULL)) {
9523 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9524#ifdef DEBUG_PUSH
9525 fprintf(stderr, "PP: pushed %d\n", size);
9526#endif
9527 }
9528
9529 return(ctxt);
9530}
9531
9532/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009533 * xmlCreateDocParserCtxt:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009534 * @cur: a pointer to an array of xmlChar
Daniel Veillardd692aa41999-02-28 21:54:31 +00009535 *
9536 * Create a parser context for an XML in-memory document.
9537 *
9538 * Returns the new parser context or NULL
9539 */
9540xmlParserCtxtPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009541xmlCreateDocParserCtxt(xmlChar *cur) {
Daniel Veillardd692aa41999-02-28 21:54:31 +00009542 xmlParserCtxtPtr ctxt;
9543 xmlParserInputPtr input;
9544
Daniel Veillardb05deb71999-08-10 19:04:08 +00009545 ctxt = xmlNewParserCtxt();
Daniel Veillardd692aa41999-02-28 21:54:31 +00009546 if (ctxt == NULL) {
Daniel Veillardd692aa41999-02-28 21:54:31 +00009547 return(NULL);
9548 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00009549 input = xmlNewInputStream(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00009550 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00009551 xmlFreeParserCtxt(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +00009552 return(NULL);
9553 }
9554
Daniel Veillardd692aa41999-02-28 21:54:31 +00009555 input->base = cur;
9556 input->cur = cur;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009557
9558 inputPush(ctxt, input);
9559 return(ctxt);
9560}
9561
9562/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009563 * xmlSAXParseDoc:
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009564 * @sax: the SAX handler block
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009565 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009566 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9567 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +00009568 *
9569 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009570 * It use the given SAX function block to handle the parsing callback.
9571 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +00009572 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009573 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +00009574 */
9575
Daniel Veillard1e346af1999-02-22 10:33:01 +00009576xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009577xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009578 xmlDocPtr ret;
9579 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009580
9581 if (cur == NULL) return(NULL);
9582
Daniel Veillardd692aa41999-02-28 21:54:31 +00009583
9584 ctxt = xmlCreateDocParserCtxt(cur);
9585 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +00009586 if (sax != NULL) {
9587 ctxt->sax = sax;
9588 ctxt->userData = NULL;
9589 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009590
9591 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +00009592 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009593 else {
9594 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00009595 xmlFreeDoc(ctxt->myDoc);
9596 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009597 }
Daniel Veillard97fea181999-06-26 23:07:37 +00009598 if (sax != NULL)
9599 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +00009600 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009601
9602 return(ret);
9603}
9604
Daniel Veillard11e00581998-10-24 18:27:49 +00009605/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009606 * xmlParseDoc:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009607 * @cur: a pointer to an array of xmlChar
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009608 *
9609 * parse an XML in-memory document and build a tree.
9610 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009611 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +00009612 */
9613
Daniel Veillard1e346af1999-02-22 10:33:01 +00009614xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009615xmlParseDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009616 return(xmlSAXParseDoc(NULL, cur, 0));
9617}
9618
9619/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009620 * xmlSAXParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +00009621 * @sax: the SAX handler block
9622 * @ExternalID: a NAME* containing the External ID of the DTD
9623 * @SystemID: a NAME* containing the URL to the DTD
9624 *
9625 * Load and parse an external subset.
9626 *
9627 * Returns the resulting xmlDtdPtr or NULL in case of error.
9628 */
9629
9630xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009631xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9632 const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00009633 xmlDtdPtr ret = NULL;
9634 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +00009635 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00009636 xmlCharEncoding enc;
9637
9638 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9639
Daniel Veillardb05deb71999-08-10 19:04:08 +00009640 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +00009641 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00009642 return(NULL);
9643 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00009644
9645 /*
9646 * Set-up the SAX context
9647 */
9648 if (ctxt == NULL) return(NULL);
9649 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009650 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00009651 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +00009652 ctxt->sax = sax;
9653 ctxt->userData = NULL;
9654 }
9655
9656 /*
9657 * Ask the Entity resolver to load the damn thing
9658 */
9659
9660 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9661 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9662 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +00009663 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00009664 xmlFreeParserCtxt(ctxt);
9665 return(NULL);
9666 }
9667
9668 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00009669 * plug some encoding conversion routines here.
Daniel Veillard011b63c1999-06-02 17:44:04 +00009670 */
9671 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00009672 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
Daniel Veillard011b63c1999-06-02 17:44:04 +00009673 xmlSwitchEncoding(ctxt, enc);
9674
Daniel Veillardb05deb71999-08-10 19:04:08 +00009675 if (input->filename == NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +00009676 input->filename = (char *) xmlStrdup(SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +00009677 input->line = 1;
9678 input->col = 1;
9679 input->base = ctxt->input->cur;
9680 input->cur = ctxt->input->cur;
9681 input->free = NULL;
9682
9683 /*
9684 * let's parse that entity knowing it's an external subset.
9685 */
9686 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9687
9688 if (ctxt->myDoc != NULL) {
9689 if (ctxt->wellFormed) {
9690 ret = ctxt->myDoc->intSubset;
9691 ctxt->myDoc->intSubset = NULL;
9692 } else {
9693 ret = NULL;
9694 }
9695 xmlFreeDoc(ctxt->myDoc);
9696 ctxt->myDoc = NULL;
9697 }
Daniel Veillard97fea181999-06-26 23:07:37 +00009698 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00009699 xmlFreeParserCtxt(ctxt);
9700
9701 return(ret);
9702}
9703
9704/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009705 * xmlParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +00009706 * @ExternalID: a NAME* containing the External ID of the DTD
9707 * @SystemID: a NAME* containing the URL to the DTD
9708 *
9709 * Load and parse an external subset.
9710 *
9711 * Returns the resulting xmlDtdPtr or NULL in case of error.
9712 */
9713
9714xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009715xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00009716 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9717}
9718
9719/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009720 * xmlSAXParseBalancedChunk:
Daniel Veillard0142b842000-01-14 14:45:24 +00009721 * @ctx: an XML parser context (possibly NULL)
9722 * @sax: the SAX handler bloc (possibly NULL)
9723 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9724 * @input: a parser input stream
9725 * @enc: the encoding
9726 *
9727 * Parse a well-balanced chunk of an XML document
9728 * The user has to provide SAX callback block whose routines will be
9729 * called by the parser
9730 * The allowed sequence for the Well Balanced Chunk is the one defined by
9731 * the content production in the XML grammar:
9732 *
9733 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9734 *
Daniel Veillardcf461992000-03-14 18:30:20 +00009735 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
Daniel Veillard0142b842000-01-14 14:45:24 +00009736 * the error code otherwise
9737 */
9738
9739int
9740xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9741 void *user_data, xmlParserInputPtr input,
9742 xmlCharEncoding enc) {
9743 xmlParserCtxtPtr ctxt;
9744 int ret;
9745
9746 if (input == NULL) return(-1);
9747
9748 if (ctx != NULL)
9749 ctxt = ctx;
9750 else {
9751 ctxt = xmlNewParserCtxt();
9752 if (ctxt == NULL)
9753 return(-1);
9754 if (sax == NULL)
9755 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9756 }
9757
9758 /*
9759 * Set-up the SAX context
9760 */
9761 if (sax != NULL) {
9762 if (ctxt->sax != NULL)
9763 xmlFree(ctxt->sax);
9764 ctxt->sax = sax;
9765 ctxt->userData = user_data;
9766 }
9767
9768 /*
9769 * plug some encoding conversion routines here.
9770 */
9771 xmlPushInput(ctxt, input);
9772 if (enc != XML_CHAR_ENCODING_NONE)
9773 xmlSwitchEncoding(ctxt, enc);
9774
9775 /*
9776 * let's parse that entity knowing it's an external subset.
9777 */
9778 xmlParseContent(ctxt);
9779 ret = ctxt->errNo;
9780
9781 if (ctx == NULL) {
9782 if (sax != NULL)
9783 ctxt->sax = NULL;
9784 else
9785 xmlFreeDoc(ctxt->myDoc);
9786 xmlFreeParserCtxt(ctxt);
9787 }
9788 return(ret);
9789}
9790
9791/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009792 * xmlParseExternalEntity:
Daniel Veillard0142b842000-01-14 14:45:24 +00009793 * @doc: the document the chunk pertains to
Daniel Veillardcf461992000-03-14 18:30:20 +00009794 * @sax: the SAX handler bloc (possibly NULL)
9795 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9796 * @depth: Used for loop detection, use 0
9797 * @URL: the URL for the entity to load
9798 * @ID: the System ID for the entity to load
9799 * @list: the return value for the set of parsed nodes
Daniel Veillard0142b842000-01-14 14:45:24 +00009800 *
Daniel Veillardcf461992000-03-14 18:30:20 +00009801 * Parse an external general entity
9802 * An external general parsed entity is well-formed if it matches the
9803 * production labeled extParsedEnt.
9804 *
9805 * [78] extParsedEnt ::= TextDecl? content
9806 *
9807 * Returns 0 if the entity is well formed, -1 in case of args problem and
9808 * the parser error code otherwise
Daniel Veillard0142b842000-01-14 14:45:24 +00009809 */
9810
Daniel Veillardcf461992000-03-14 18:30:20 +00009811int
9812xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9813 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
9814 xmlParserCtxtPtr ctxt;
9815 xmlDocPtr newDoc;
9816 xmlSAXHandlerPtr oldsax = NULL;
9817 int ret = 0;
9818
9819 if (depth > 40) {
9820 return(XML_ERR_ENTITY_LOOP);
9821 }
9822
9823
9824
9825 if (list != NULL)
9826 *list = NULL;
9827 if ((URL == NULL) && (ID == NULL))
9828 return(-1);
9829
9830
9831 ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
9832 if (ctxt == NULL) return(-1);
9833 ctxt->userData = ctxt;
9834 if (sax != NULL) {
9835 oldsax = ctxt->sax;
9836 ctxt->sax = sax;
9837 if (user_data != NULL)
9838 ctxt->userData = user_data;
9839 }
9840 newDoc = xmlNewDoc(BAD_CAST "1.0");
9841 if (newDoc == NULL) {
9842 xmlFreeParserCtxt(ctxt);
9843 return(-1);
9844 }
9845 if (doc != NULL) {
9846 newDoc->intSubset = doc->intSubset;
9847 newDoc->extSubset = doc->extSubset;
9848 }
9849 if (doc->URL != NULL) {
9850 newDoc->URL = xmlStrdup(doc->URL);
9851 }
9852 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9853 if (newDoc->children == NULL) {
9854 if (sax != NULL)
9855 ctxt->sax = oldsax;
9856 xmlFreeParserCtxt(ctxt);
9857 newDoc->intSubset = NULL;
9858 newDoc->extSubset = NULL;
9859 xmlFreeDoc(newDoc);
9860 return(-1);
9861 }
9862 nodePush(ctxt, newDoc->children);
9863 if (doc == NULL) {
9864 ctxt->myDoc = newDoc;
9865 } else {
9866 ctxt->myDoc = doc;
9867 newDoc->children->doc = doc;
9868 }
9869
9870 /*
9871 * Parse a possible text declaration first
9872 */
9873 GROW;
9874 if ((RAW == '<') && (NXT(1) == '?') &&
9875 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9876 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9877 xmlParseTextDecl(ctxt);
9878 }
9879
9880 /*
9881 * Doing validity checking on chunk doesn't make sense
9882 */
9883 ctxt->instate = XML_PARSER_CONTENT;
9884 ctxt->validate = 0;
9885 ctxt->depth = depth;
9886
9887 xmlParseContent(ctxt);
9888
9889 if ((RAW == '<') && (NXT(1) == '/')) {
9890 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9891 ctxt->sax->error(ctxt->userData,
9892 "chunk is not well balanced\n");
9893 ctxt->wellFormed = 0;
9894 ctxt->disableSAX = 1;
9895 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9896 } else if (RAW != 0) {
9897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9898 ctxt->sax->error(ctxt->userData,
9899 "extra content at the end of well balanced chunk\n");
9900 ctxt->wellFormed = 0;
9901 ctxt->disableSAX = 1;
9902 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9903 }
9904 if (ctxt->node != newDoc->children) {
9905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9906 ctxt->sax->error(ctxt->userData,
9907 "chunk is not well balanced\n");
9908 ctxt->wellFormed = 0;
9909 ctxt->disableSAX = 1;
9910 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9911 }
9912
9913 if (!ctxt->wellFormed) {
9914 if (ctxt->errNo == 0)
9915 ret = 1;
9916 else
9917 ret = ctxt->errNo;
9918 } else {
9919 if (list != NULL) {
9920 xmlNodePtr cur;
9921
9922 /*
9923 * Return the newly created nodeset after unlinking it from
9924 * they pseudo parent.
9925 */
9926 cur = newDoc->children->children;
9927 *list = cur;
9928 while (cur != NULL) {
9929 cur->parent = NULL;
9930 cur = cur->next;
9931 }
9932 newDoc->children->children = NULL;
9933 }
9934 ret = 0;
9935 }
9936 if (sax != NULL)
9937 ctxt->sax = oldsax;
9938 xmlFreeParserCtxt(ctxt);
9939 newDoc->intSubset = NULL;
9940 newDoc->extSubset = NULL;
9941 xmlFreeDoc(newDoc);
9942
9943 return(ret);
Daniel Veillard0142b842000-01-14 14:45:24 +00009944}
9945
9946/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009947 * xmlParseBalancedChunk:
9948 * @doc: the document the chunk pertains to
9949 * @sax: the SAX handler bloc (possibly NULL)
9950 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9951 * @depth: Used for loop detection, use 0
9952 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9953 * @list: the return value for the set of parsed nodes
9954 *
9955 * Parse a well-balanced chunk of an XML document
9956 * called by the parser
9957 * The allowed sequence for the Well Balanced Chunk is the one defined by
9958 * the content production in the XML grammar:
9959 *
9960 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9961 *
9962 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9963 * the parser error code otherwise
9964 */
9965
9966int
9967xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9968 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9969 xmlParserCtxtPtr ctxt;
9970 xmlDocPtr newDoc;
9971 xmlSAXHandlerPtr oldsax = NULL;
9972 int size;
9973 int ret = 0;
9974
9975 if (depth > 40) {
9976 return(XML_ERR_ENTITY_LOOP);
9977 }
9978
9979
9980 if (list != NULL)
9981 *list = NULL;
9982 if (string == NULL)
9983 return(-1);
9984
9985 size = xmlStrlen(string);
9986
9987 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9988 if (ctxt == NULL) return(-1);
9989 ctxt->userData = ctxt;
9990 if (sax != NULL) {
9991 oldsax = ctxt->sax;
9992 ctxt->sax = sax;
9993 if (user_data != NULL)
9994 ctxt->userData = user_data;
9995 }
9996 newDoc = xmlNewDoc(BAD_CAST "1.0");
9997 if (newDoc == NULL) {
9998 xmlFreeParserCtxt(ctxt);
9999 return(-1);
10000 }
10001 if (doc != NULL) {
10002 newDoc->intSubset = doc->intSubset;
10003 newDoc->extSubset = doc->extSubset;
10004 }
10005 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10006 if (newDoc->children == NULL) {
10007 if (sax != NULL)
10008 ctxt->sax = oldsax;
10009 xmlFreeParserCtxt(ctxt);
10010 newDoc->intSubset = NULL;
10011 newDoc->extSubset = NULL;
10012 xmlFreeDoc(newDoc);
10013 return(-1);
10014 }
10015 nodePush(ctxt, newDoc->children);
10016 if (doc == NULL) {
10017 ctxt->myDoc = newDoc;
10018 } else {
10019 ctxt->myDoc = doc;
10020 newDoc->children->doc = doc;
10021 }
10022 ctxt->instate = XML_PARSER_CONTENT;
10023 ctxt->depth = depth;
10024
10025 /*
10026 * Doing validity checking on chunk doesn't make sense
10027 */
10028 ctxt->validate = 0;
10029
10030 xmlParseContent(ctxt);
10031
10032 if ((RAW == '<') && (NXT(1) == '/')) {
10033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10034 ctxt->sax->error(ctxt->userData,
10035 "chunk is not well balanced\n");
10036 ctxt->wellFormed = 0;
10037 ctxt->disableSAX = 1;
10038 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10039 } else if (RAW != 0) {
10040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10041 ctxt->sax->error(ctxt->userData,
10042 "extra content at the end of well balanced chunk\n");
10043 ctxt->wellFormed = 0;
10044 ctxt->disableSAX = 1;
10045 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10046 }
10047 if (ctxt->node != newDoc->children) {
10048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10049 ctxt->sax->error(ctxt->userData,
10050 "chunk is not well balanced\n");
10051 ctxt->wellFormed = 0;
10052 ctxt->disableSAX = 1;
10053 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10054 }
10055
10056 if (!ctxt->wellFormed) {
10057 if (ctxt->errNo == 0)
10058 ret = 1;
10059 else
10060 ret = ctxt->errNo;
10061 } else {
10062 if (list != NULL) {
10063 xmlNodePtr cur;
10064
10065 /*
10066 * Return the newly created nodeset after unlinking it from
10067 * they pseudo parent.
10068 */
10069 cur = newDoc->children->children;
10070 *list = cur;
10071 while (cur != NULL) {
10072 cur->parent = NULL;
10073 cur = cur->next;
10074 }
10075 newDoc->children->children = NULL;
10076 }
10077 ret = 0;
10078 }
10079 if (sax != NULL)
10080 ctxt->sax = oldsax;
10081 xmlFreeParserCtxt(ctxt);
10082 newDoc->intSubset = NULL;
10083 newDoc->extSubset = NULL;
10084 xmlFreeDoc(newDoc);
10085
10086 return(ret);
10087}
10088
10089/**
10090 * xmlParseBalancedChunkFile:
Daniel Veillard0142b842000-01-14 14:45:24 +000010091 * @doc: the document the chunk pertains to
10092 *
10093 * Parse a well-balanced chunk of an XML document contained in a file
10094 *
10095 * Returns the resulting list of nodes resulting from the parsing,
10096 * they are not added to @node
10097 */
10098
10099xmlNodePtr
10100xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
Daniel Veillardcf461992000-03-14 18:30:20 +000010101 /* TODO !!! */
10102 return(NULL);
Daniel Veillard0142b842000-01-14 14:45:24 +000010103}
10104
10105/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010106 * xmlRecoverDoc:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010107 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010108 *
10109 * parse an XML in-memory document and build a tree.
10110 * In the case the document is not Well Formed, a tree is built anyway
10111 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010112 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010113 */
10114
Daniel Veillard1e346af1999-02-22 10:33:01 +000010115xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010116xmlRecoverDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010117 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010118}
10119
10120/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010121 * xmlCreateEntityParserCtxt:
10122 * @URL: the entity URL
10123 * @ID: the entity PUBLIC ID
10124 * @base: a posible base for the target URI
10125 *
10126 * Create a parser context for an external entity
10127 * Automatic support for ZLIB/Compress compressed document is provided
10128 * by default if found at compile-time.
10129 *
10130 * Returns the new parser context or NULL
10131 */
10132xmlParserCtxtPtr
10133xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10134 const xmlChar *base) {
10135 xmlParserCtxtPtr ctxt;
10136 xmlParserInputPtr inputStream;
10137 char *directory = NULL;
10138
10139 ctxt = xmlNewParserCtxt();
10140 if (ctxt == NULL) {
10141 return(NULL);
10142 }
10143
10144 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10145 if (inputStream == NULL) {
10146 xmlFreeParserCtxt(ctxt);
10147 return(NULL);
10148 }
10149
10150 inputPush(ctxt, inputStream);
10151
10152 if ((ctxt->directory == NULL) && (directory == NULL))
10153 directory = xmlParserGetDirectory((char *)URL);
10154 if ((ctxt->directory == NULL) && (directory != NULL))
10155 ctxt->directory = directory;
10156
10157 return(ctxt);
10158}
10159
10160/**
10161 * xmlCreateFileParserCtxt:
Daniel Veillard11e00581998-10-24 18:27:49 +000010162 * @filename: the filename
10163 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010164 * Create a parser context for a file content.
10165 * Automatic support for ZLIB/Compress compressed document is provided
10166 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +000010167 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010168 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000010169 */
Daniel Veillardd692aa41999-02-28 21:54:31 +000010170xmlParserCtxtPtr
10171xmlCreateFileParserCtxt(const char *filename)
10172{
10173 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010174 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010175 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +000010176 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010177
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010178 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10179 if (buf == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010180
Daniel Veillardb05deb71999-08-10 19:04:08 +000010181 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +000010182 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010183 return(NULL);
10184 }
Daniel Veillardb05deb71999-08-10 19:04:08 +000010185
10186 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010187 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000010188 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010189 return(NULL);
10190 }
10191
Daniel Veillard6454aec1999-09-02 22:04:43 +000010192 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010193 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010194 inputStream->base = inputStream->buf->buffer->content;
10195 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010196
10197 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +000010198 if ((ctxt->directory == NULL) && (directory == NULL))
10199 directory = xmlParserGetDirectory(filename);
10200 if ((ctxt->directory == NULL) && (directory != NULL))
10201 ctxt->directory = directory;
10202
Daniel Veillardd692aa41999-02-28 21:54:31 +000010203 return(ctxt);
10204}
10205
10206/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010207 * xmlSAXParseFile:
Daniel Veillardd692aa41999-02-28 21:54:31 +000010208 * @sax: the SAX handler block
10209 * @filename: the filename
10210 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10211 * documents
10212 *
10213 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10214 * compressed document is provided by default if found at compile-time.
10215 * It use the given SAX function block to handle the parsing callback.
10216 * If sax is NULL, fallback to the default DOM tree building routines.
10217 *
10218 * Returns the resulting document tree
10219 */
10220
Daniel Veillard011b63c1999-06-02 17:44:04 +000010221xmlDocPtr
10222xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +000010223 int recovery) {
10224 xmlDocPtr ret;
10225 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +000010226 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010227
10228 ctxt = xmlCreateFileParserCtxt(filename);
10229 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000010230 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010231 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000010232 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +000010233 ctxt->sax = sax;
10234 ctxt->userData = NULL;
10235 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000010236
Daniel Veillardb05deb71999-08-10 19:04:08 +000010237 if ((ctxt->directory == NULL) && (directory == NULL))
10238 directory = xmlParserGetDirectory(filename);
10239 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +000010240 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +000010241
Daniel Veillard260a68f1998-08-13 03:39:55 +000010242 xmlParseDocument(ctxt);
10243
Daniel Veillard517752b1999-04-05 12:20:10 +000010244 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010245 else {
10246 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000010247 xmlFreeDoc(ctxt->myDoc);
10248 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010249 }
Daniel Veillard97fea181999-06-26 23:07:37 +000010250 if (sax != NULL)
10251 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010252 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010253
10254 return(ret);
10255}
10256
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010257/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010258 * xmlParseFile:
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010259 * @filename: the filename
10260 *
10261 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10262 * compressed document is provided by default if found at compile-time.
10263 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010264 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010265 */
10266
Daniel Veillard011b63c1999-06-02 17:44:04 +000010267xmlDocPtr
10268xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010269 return(xmlSAXParseFile(NULL, filename, 0));
10270}
10271
10272/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010273 * xmlRecoverFile:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010274 * @filename: the filename
10275 *
10276 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10277 * compressed document is provided by default if found at compile-time.
10278 * In the case the document is not Well Formed, a tree is built anyway
10279 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010280 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010281 */
10282
Daniel Veillard011b63c1999-06-02 17:44:04 +000010283xmlDocPtr
10284xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010285 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010286}
Daniel Veillard260a68f1998-08-13 03:39:55 +000010287
Daniel Veillard11e00581998-10-24 18:27:49 +000010288/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010289 * xmlCreateMemoryParserCtxt:
10290 * @buffer: a pointer to a zero terminated char array
Daniel Veillardb566ce12000-03-04 11:39:42 +000010291 * @size: the size of the array (without the trailing 0)
Daniel Veillard11e00581998-10-24 18:27:49 +000010292 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010293 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +000010294 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010295 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000010296 */
Daniel Veillardd692aa41999-02-28 21:54:31 +000010297xmlParserCtxtPtr
10298xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010299 xmlParserCtxtPtr ctxt;
10300 xmlParserInputPtr input;
10301
Daniel Veillardcf461992000-03-14 18:30:20 +000010302 if (buffer[size] != 0)
Daniel Veillardb566ce12000-03-04 11:39:42 +000010303 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010304
Daniel Veillardb05deb71999-08-10 19:04:08 +000010305 ctxt = xmlNewParserCtxt();
Daniel Veillardcf461992000-03-14 18:30:20 +000010306 if (ctxt == NULL)
Daniel Veillard260a68f1998-08-13 03:39:55 +000010307 return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +000010308
10309 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010310 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000010311 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010312 return(NULL);
10313 }
10314
10315 input->filename = NULL;
10316 input->line = 1;
10317 input->col = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +000010318 input->buf = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010319 input->consumed = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010320
Daniel Veillardb96e6431999-08-29 21:02:19 +000010321 input->base = BAD_CAST buffer;
10322 input->cur = BAD_CAST buffer;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010323 input->free = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010324
10325 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +000010326 return(ctxt);
10327}
10328
10329/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010330 * xmlSAXParseMemory:
Daniel Veillardd692aa41999-02-28 21:54:31 +000010331 * @sax: the SAX handler block
10332 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +000010333 * @size: the size of the array
10334 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
Daniel Veillardd692aa41999-02-28 21:54:31 +000010335 * documents
10336 *
10337 * parse an XML in-memory block and use the given SAX function block
10338 * to handle the parsing callback. If sax is NULL, fallback to the default
10339 * DOM tree building routines.
10340 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000010341 * Returns the resulting document tree
10342 */
10343xmlDocPtr
10344xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10345 xmlDocPtr ret;
10346 xmlParserCtxtPtr ctxt;
10347
10348 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10349 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000010350 if (sax != NULL) {
10351 ctxt->sax = sax;
10352 ctxt->userData = NULL;
10353 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000010354
10355 xmlParseDocument(ctxt);
10356
Daniel Veillard517752b1999-04-05 12:20:10 +000010357 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010358 else {
10359 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000010360 xmlFreeDoc(ctxt->myDoc);
10361 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010362 }
Daniel Veillard97fea181999-06-26 23:07:37 +000010363 if (sax != NULL)
10364 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010365 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010366
10367 return(ret);
10368}
10369
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010370/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010371 * xmlParseMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +000010372 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010373 * @size: the size of the array
10374 *
10375 * parse an XML in-memory block and build a tree.
10376 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010377 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010378 */
10379
10380xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010381 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10382}
10383
10384/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010385 * xmlRecoverMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +000010386 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010387 * @size: the size of the array
10388 *
10389 * parse an XML in-memory block and build a tree.
10390 * In the case the document is not Well Formed, a tree is built anyway
10391 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010392 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010393 */
10394
10395xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10396 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010397}
Daniel Veillard260a68f1998-08-13 03:39:55 +000010398
Daniel Veillard260a68f1998-08-13 03:39:55 +000010399
Daniel Veillard11e00581998-10-24 18:27:49 +000010400/**
10401 * xmlSetupParserForBuffer:
10402 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010403 * @buffer: a xmlChar * buffer
Daniel Veillard11e00581998-10-24 18:27:49 +000010404 * @filename: a file name
10405 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000010406 * Setup the parser context to parse a new buffer; Clears any prior
10407 * contents from the parser context. The buffer parameter must not be
10408 * NULL, but the filename parameter can be
10409 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000010410void
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010411xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +000010412 const char* filename)
10413{
Daniel Veillardb05deb71999-08-10 19:04:08 +000010414 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010415
Daniel Veillardb05deb71999-08-10 19:04:08 +000010416 input = xmlNewInputStream(ctxt);
10417 if (input == NULL) {
10418 perror("malloc");
Daniel Veillard6454aec1999-09-02 22:04:43 +000010419 xmlFree(ctxt);
Daniel Veillard0142b842000-01-14 14:45:24 +000010420 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +000010421 }
10422
10423 xmlClearParserCtxt(ctxt);
10424 if (filename != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000010425 input->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +000010426 input->base = buffer;
10427 input->cur = buffer;
10428 inputPush(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010429}
10430
Daniel Veillard7a66ee61999-09-26 11:31:02 +000010431/**
10432 * xmlSAXUserParseFile:
10433 * @sax: a SAX handler
10434 * @user_data: The user data returned on SAX callbacks
10435 * @filename: a file name
10436 *
10437 * parse an XML file and call the given SAX handler routines.
10438 * Automatic support for ZLIB/Compress compressed document is provided
10439 *
10440 * Returns 0 in case of success or a error number otherwise
10441 */
Daniel Veillard11a48ec1999-11-23 10:40:46 +000010442int
10443xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10444 const char *filename) {
Daniel Veillard7a66ee61999-09-26 11:31:02 +000010445 int ret = 0;
10446 xmlParserCtxtPtr ctxt;
10447
10448 ctxt = xmlCreateFileParserCtxt(filename);
10449 if (ctxt == NULL) return -1;
Daniel Veillard294cbca1999-12-03 13:19:09 +000010450 if (ctxt->sax != &xmlDefaultSAXHandler)
10451 xmlFree(ctxt->sax);
Daniel Veillard7a66ee61999-09-26 11:31:02 +000010452 ctxt->sax = sax;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010453 if (user_data != NULL)
10454 ctxt->userData = user_data;
Daniel Veillard7a66ee61999-09-26 11:31:02 +000010455
10456 xmlParseDocument(ctxt);
10457
10458 if (ctxt->wellFormed)
10459 ret = 0;
10460 else {
10461 if (ctxt->errNo != 0)
10462 ret = ctxt->errNo;
10463 else
10464 ret = -1;
10465 }
10466 if (sax != NULL)
10467 ctxt->sax = NULL;
10468 xmlFreeParserCtxt(ctxt);
10469
10470 return ret;
10471}
10472
10473/**
10474 * xmlSAXUserParseMemory:
10475 * @sax: a SAX handler
10476 * @user_data: The user data returned on SAX callbacks
10477 * @buffer: an in-memory XML document input
Daniel Veillard51e3b151999-11-12 17:02:31 +000010478 * @size: the length of the XML document in bytes
Daniel Veillard7a66ee61999-09-26 11:31:02 +000010479 *
10480 * A better SAX parsing routine.
10481 * parse an XML in-memory buffer and call the given SAX handler routines.
10482 *
10483 * Returns 0 in case of success or a error number otherwise
10484 */
10485int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10486 char *buffer, int size) {
10487 int ret = 0;
10488 xmlParserCtxtPtr ctxt;
10489
10490 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10491 if (ctxt == NULL) return -1;
10492 ctxt->sax = sax;
10493 ctxt->userData = user_data;
10494
10495 xmlParseDocument(ctxt);
10496
10497 if (ctxt->wellFormed)
10498 ret = 0;
10499 else {
10500 if (ctxt->errNo != 0)
10501 ret = ctxt->errNo;
10502 else
10503 ret = -1;
10504 }
10505 if (sax != NULL)
10506 ctxt->sax = NULL;
10507 xmlFreeParserCtxt(ctxt);
10508
10509 return ret;
10510}
10511
Daniel Veillard260a68f1998-08-13 03:39:55 +000010512
Daniel Veillardb05deb71999-08-10 19:04:08 +000010513/************************************************************************
10514 * *
Daniel Veillard51e3b151999-11-12 17:02:31 +000010515 * Miscellaneous *
Daniel Veillardb05deb71999-08-10 19:04:08 +000010516 * *
10517 ************************************************************************/
10518
Daniel Veillarda819dac1999-11-24 18:04:22 +000010519/**
10520 * xmlCleanupParser:
10521 *
10522 * Cleanup function for the XML parser. It tries to reclaim all
10523 * parsing related global memory allocated for the parser processing.
10524 * It doesn't deallocate any document related memory. Calling this
10525 * function should not prevent reusing the parser.
10526 */
10527
10528void
10529xmlCleanupParser(void) {
10530 xmlCleanupCharEncodingHandlers();
Daniel Veillardf5c2c871999-12-01 09:51:45 +000010531 xmlCleanupPredefinedEntities();
Daniel Veillarda819dac1999-11-24 18:04:22 +000010532}
Daniel Veillardb05deb71999-08-10 19:04:08 +000010533
Daniel Veillard11e00581998-10-24 18:27:49 +000010534/**
10535 * xmlParserFindNodeInfo:
10536 * @ctxt: an XML parser context
10537 * @node: an XML node within the tree
10538 *
10539 * Find the parser node info struct for a given node
10540 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010541 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000010542 */
10543const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10544 const xmlNode* node)
10545{
10546 unsigned long pos;
10547
10548 /* Find position where node should be at */
10549 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10550 if ( ctx->node_seq.buffer[pos].node == node )
10551 return &ctx->node_seq.buffer[pos];
10552 else
10553 return NULL;
10554}
10555
10556
Daniel Veillard11e00581998-10-24 18:27:49 +000010557/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010558 * xmlInitNodeInfoSeq:
Daniel Veillard11e00581998-10-24 18:27:49 +000010559 * @seq: a node info sequence pointer
10560 *
10561 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +000010562 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000010563void
10564xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +000010565{
10566 seq->length = 0;
10567 seq->maximum = 0;
10568 seq->buffer = NULL;
10569}
10570
Daniel Veillard11e00581998-10-24 18:27:49 +000010571/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010572 * xmlClearNodeInfoSeq:
Daniel Veillard11e00581998-10-24 18:27:49 +000010573 * @seq: a node info sequence pointer
10574 *
10575 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +000010576 * info sequence
10577 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000010578void
10579xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +000010580{
10581 if ( seq->buffer != NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +000010582 xmlFree(seq->buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010583 xmlInitNodeInfoSeq(seq);
10584}
10585
10586
Daniel Veillard11e00581998-10-24 18:27:49 +000010587/**
10588 * xmlParserFindNodeInfoIndex:
10589 * @seq: a node info sequence pointer
10590 * @node: an XML node pointer
10591 *
10592 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000010593 * xmlParserFindNodeInfoIndex : Find the index that the info record for
10594 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +000010595 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010596 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +000010597 */
10598unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10599 const xmlNode* node)
10600{
10601 unsigned long upper, lower, middle;
10602 int found = 0;
10603
10604 /* Do a binary search for the key */
10605 lower = 1;
10606 upper = seq->length;
10607 middle = 0;
10608 while ( lower <= upper && !found) {
10609 middle = lower + (upper - lower) / 2;
10610 if ( node == seq->buffer[middle - 1].node )
10611 found = 1;
10612 else if ( node < seq->buffer[middle - 1].node )
10613 upper = middle - 1;
10614 else
10615 lower = middle + 1;
10616 }
10617
10618 /* Return position */
10619 if ( middle == 0 || seq->buffer[middle - 1].node < node )
10620 return middle;
10621 else
10622 return middle - 1;
10623}
10624
10625
Daniel Veillard11e00581998-10-24 18:27:49 +000010626/**
10627 * xmlParserAddNodeInfo:
10628 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +000010629 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +000010630 *
10631 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +000010632 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000010633void
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010634xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +000010635 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +000010636{
10637 unsigned long pos;
10638 static unsigned int block_size = 5;
10639
10640 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010641 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10642 if ( pos < ctxt->node_seq.length
10643 && ctxt->node_seq.buffer[pos].node == info->node ) {
10644 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010645 }
10646
10647 /* Otherwise, we need to add new node to buffer */
10648 else {
10649 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010650 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010651 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010652 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10653 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +000010654
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010655 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +000010656 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010657 else
Daniel Veillard6454aec1999-09-02 22:04:43 +000010658 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010659
10660 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +000010662 ctxt->sax->error(ctxt->userData, "Out of memory\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010663 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010664 return;
10665 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010666 ctxt->node_seq.buffer = tmp_buffer;
10667 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010668 }
10669
10670 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010671 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010672 unsigned long i;
10673
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010674 for ( i = ctxt->node_seq.length; i > pos; i-- )
10675 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +000010676 }
10677
10678 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000010679 ctxt->node_seq.buffer[pos] = *info;
10680 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010681 }
10682}
Daniel Veillard011b63c1999-06-02 17:44:04 +000010683
10684
Daniel Veillardb05deb71999-08-10 19:04:08 +000010685/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010686 * xmlSubstituteEntitiesDefault:
Daniel Veillardb05deb71999-08-10 19:04:08 +000010687 * @val: int 0 or 1
10688 *
10689 * Set and return the previous value for default entity support.
10690 * Initially the parser always keep entity references instead of substituting
10691 * entity values in the output. This function has to be used to change the
10692 * default parser behaviour
10693 * SAX::subtituteEntities() has to be used for changing that on a file by
10694 * file basis.
10695 *
10696 * Returns the last value for 0 for no substitution, 1 for substitution.
10697 */
10698
10699int
10700xmlSubstituteEntitiesDefault(int val) {
10701 int old = xmlSubstituteEntitiesDefaultValue;
10702
10703 xmlSubstituteEntitiesDefaultValue = val;
10704 return(old);
10705}
10706
Daniel Veillardfb76c402000-03-04 11:39:42 +000010707/**
10708 * xmlKeepBlanksDefault:
10709 * @val: int 0 or 1
10710 *
10711 * Set and return the previous value for default blanks text nodes support.
10712 * The 1.x version of the parser used an heuristic to try to detect
10713 * ignorable white spaces. As a result the SAX callback was generating
10714 * ignorableWhitespace() callbacks instead of characters() one, and when
10715 * using the DOM output text nodes containing those blanks were not generated.
10716 * The 2.x and later version will switch to the XML standard way and
10717 * ignorableWhitespace() are only generated when running the parser in
10718 * validating mode and when the current element doesn't allow CDATA or
10719 * mixed content.
10720 * This function is provided as a way to force the standard behaviour
10721 * on 1.X libs and to switch back to the old mode for compatibility when
10722 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10723 * by using xmlIsBlankNode() commodity function to detect the "empty"
10724 * nodes generated.
10725 * This value also affect autogeneration of indentation when saving code
10726 * if blanks sections are kept, indentation is not generated.
10727 *
10728 * Returns the last value for 0 for no substitution, 1 for substitution.
10729 */
10730
10731int
10732xmlKeepBlanksDefault(int val) {
10733 int old = xmlKeepBlanksDefaultValue;
10734
10735 xmlKeepBlanksDefaultValue = val;
10736 xmlIndentTreeOutput = !val;
10737 return(old);
10738}
10739