blob: 83a850af540954ae7e3e9f43ee3b734d727a0f18 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000092xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
93 const xmlChar **str);
94
Daniel Veillard257d9102001-05-08 10:41:44 +000095static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000096xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
97 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000098 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000100
Daniel Veillard8107a222002-01-13 14:10:10 +0000101static void
102xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
103 xmlNodePtr lastNode);
104
Owen Taylor3473f882001-02-23 17:55:21 +0000105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000158 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000159 *
160 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 *
162 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000163 */
164/**
165 * namePop:
166 * @ctxt: an XML parser context
167 *
168 * Pops the top element name from the name stack
169 *
170 * Returns the name just removed
171 */
172/**
173 * namePush:
174 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000175 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000176 *
177 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 *
179 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000180 */
181/**
182 * nodePop:
183 * @ctxt: an XML parser context
184 *
185 * Pops the top element node from the node stack
186 *
187 * Returns the node just removed
188 */
189/**
190 * nodePush:
191 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000192 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000193 *
194 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 *
196 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000197 */
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
199 * Those macros actually generate the functions
200 */
201PUSH_AND_POP(extern, xmlParserInputPtr, input)
202PUSH_AND_POP(extern, xmlNodePtr, node)
203PUSH_AND_POP(extern, xmlChar*, name)
204
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000205static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 if (ctxt->spaceNr >= ctxt->spaceMax) {
207 ctxt->spaceMax *= 2;
208 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
209 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
210 if (ctxt->spaceTab == NULL) {
211 xmlGenericError(xmlGenericErrorContext,
212 "realloc failed !\n");
213 return(0);
214 }
215 }
216 ctxt->spaceTab[ctxt->spaceNr] = val;
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
218 return(ctxt->spaceNr++);
219}
220
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000221static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000222 int ret;
223 if (ctxt->spaceNr <= 0) return(0);
224 ctxt->spaceNr--;
225 if (ctxt->spaceNr > 0)
226 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
227 else
228 ctxt->space = NULL;
229 ret = ctxt->spaceTab[ctxt->spaceNr];
230 ctxt->spaceTab[ctxt->spaceNr] = -1;
231 return(ret);
232}
233
234/*
235 * Macros for accessing the content. Those should be used only by the parser,
236 * and not exported.
237 *
238 * Dirty macros, i.e. one often need to make assumption on the context to
239 * use them
240 *
241 * CUR_PTR return the current pointer to the xmlChar to be parsed.
242 * To be used with extreme caution since operations consuming
243 * characters may move the input buffer to a different location !
244 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
245 * This should be used internally by the parser
246 * only to compare to ASCII values otherwise it would break when
247 * running with UTF-8 encoding.
248 * RAW same as CUR but in the input buffer, bypass any token
249 * extraction that may have been done
250 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
251 * to compare on ASCII based substring.
252 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
253 * strings within the parser.
254 *
255 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
256 *
257 * NEXT Skip to the next character, this does the proper decoding
258 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000259 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000260 * CUR_CHAR(l) returns the current unicode character (int), set l
261 * to the number of xmlChars used for the encoding [0-5].
262 * CUR_SCHAR same but operate on a string instead of the context
263 * COPY_BUF copy the current unicode char to the target buffer, increment
264 * the index
265 * GROW, SHRINK handling of input buffers
266 */
267
268#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
269#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
270#define NXT(val) ctxt->input->cur[(val)]
271#define CUR_PTR ctxt->input->cur
272
273#define SKIP(val) do { \
274 ctxt->nbChars += (val),ctxt->input->cur += (val); \
275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000276 if ((*ctxt->input->cur == 0) && \
277 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
278 xmlPopInput(ctxt); \
279 } while (0)
280
Daniel Veillard48b2f892001-02-25 16:11:03 +0000281#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000282 xmlParserInputShrink(ctxt->input); \
283 if ((*ctxt->input->cur == 0) && \
284 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
285 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000286 }
Owen Taylor3473f882001-02-23 17:55:21 +0000287
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000289 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
290 if ((*ctxt->input->cur == 0) && \
291 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
292 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000293 }
Owen Taylor3473f882001-02-23 17:55:21 +0000294
295#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
296
297#define NEXT xmlNextChar(ctxt)
298
Daniel Veillard21a0f912001-02-25 19:54:14 +0000299#define NEXT1 { \
300 ctxt->input->cur++; \
301 ctxt->nbChars++; \
302 if (*ctxt->input->cur == 0) \
303 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
304 }
305
Owen Taylor3473f882001-02-23 17:55:21 +0000306#define NEXTL(l) do { \
307 if (*(ctxt->input->cur) == '\n') { \
308 ctxt->input->line++; ctxt->input->col = 1; \
309 } else ctxt->input->col++; \
310 ctxt->token = 0; ctxt->input->cur += l; \
311 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000312 } while (0)
313
314#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
315#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
316
317#define COPY_BUF(l,b,i,v) \
318 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000320
321/**
322 * xmlSkipBlankChars:
323 * @ctxt: the XML parser context
324 *
325 * skip all blanks character found at that point in the input streams.
326 * It pops up finished entities in the process if allowable at that point.
327 *
328 * Returns the number of space chars skipped
329 */
330
331int
332xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000333 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000334
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 if (ctxt->token != 0) {
336 if (!IS_BLANK(ctxt->token))
337 return(0);
338 ctxt->token = 0;
339 res++;
340 }
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
342 * It's Okay to use CUR/NEXT here since all the blanks are on
343 * the ASCII range.
344 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000345 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
346 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000347 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000349 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 cur = ctxt->input->cur;
351 while (IS_BLANK(*cur)) {
352 if (*cur == '\n') {
353 ctxt->input->line++; ctxt->input->col = 1;
354 }
355 cur++;
356 res++;
357 if (*cur == 0) {
358 ctxt->input->cur = cur;
359 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
360 cur = ctxt->input->cur;
361 }
362 }
363 ctxt->input->cur = cur;
364 } else {
365 int cur;
366 do {
367 cur = CUR;
368 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
369 NEXT;
370 cur = CUR;
371 res++;
372 }
373 while ((cur == 0) && (ctxt->inputNr > 1) &&
374 (ctxt->instate != XML_PARSER_COMMENT)) {
375 xmlPopInput(ctxt);
376 cur = CUR;
377 }
378 /*
379 * Need to handle support of entities branching here
380 */
381 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
382 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
383 }
Owen Taylor3473f882001-02-23 17:55:21 +0000384 return(res);
385}
386
387/************************************************************************
388 * *
389 * Commodity functions to handle entities *
390 * *
391 ************************************************************************/
392
393/**
394 * xmlPopInput:
395 * @ctxt: an XML parser context
396 *
397 * xmlPopInput: the current input pointed by ctxt->input came to an end
398 * pop it and return the next char.
399 *
400 * Returns the current xmlChar in the parser context
401 */
402xmlChar
403xmlPopInput(xmlParserCtxtPtr ctxt) {
404 if (ctxt->inputNr == 1) return(0); /* End of main Input */
405 if (xmlParserDebugEntities)
406 xmlGenericError(xmlGenericErrorContext,
407 "Popping input %d\n", ctxt->inputNr);
408 xmlFreeInputStream(inputPop(ctxt));
409 if ((*ctxt->input->cur == 0) &&
410 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
411 return(xmlPopInput(ctxt));
412 return(CUR);
413}
414
415/**
416 * xmlPushInput:
417 * @ctxt: an XML parser context
418 * @input: an XML parser input fragment (entity, XML fragment ...).
419 *
420 * xmlPushInput: switch to a new input stream which is stacked on top
421 * of the previous one(s).
422 */
423void
424xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
425 if (input == NULL) return;
426
427 if (xmlParserDebugEntities) {
428 if ((ctxt->input != NULL) && (ctxt->input->filename))
429 xmlGenericError(xmlGenericErrorContext,
430 "%s(%d): ", ctxt->input->filename,
431 ctxt->input->line);
432 xmlGenericError(xmlGenericErrorContext,
433 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
434 }
435 inputPush(ctxt, input);
436 GROW;
437}
438
439/**
440 * xmlParseCharRef:
441 * @ctxt: an XML parser context
442 *
443 * parse Reference declarations
444 *
445 * [66] CharRef ::= '&#' [0-9]+ ';' |
446 * '&#x' [0-9a-fA-F]+ ';'
447 *
448 * [ WFC: Legal Character ]
449 * Characters referred to using character references must match the
450 * production for Char.
451 *
452 * Returns the value parsed (as an int), 0 in case of error
453 */
454int
455xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000456 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000457 int count = 0;
458
459 if (ctxt->token != 0) {
460 val = ctxt->token;
461 ctxt->token = 0;
462 return(val);
463 }
464 /*
465 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
466 */
467 if ((RAW == '&') && (NXT(1) == '#') &&
468 (NXT(2) == 'x')) {
469 SKIP(3);
470 GROW;
471 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000472 if (count++ > 20) {
473 count = 0;
474 GROW;
475 }
476 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000477 val = val * 16 + (CUR - '0');
478 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
479 val = val * 16 + (CUR - 'a') + 10;
480 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
481 val = val * 16 + (CUR - 'A') + 10;
482 else {
483 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
485 ctxt->sax->error(ctxt->userData,
486 "xmlParseCharRef: invalid hexadecimal value\n");
487 ctxt->wellFormed = 0;
488 ctxt->disableSAX = 1;
489 val = 0;
490 break;
491 }
492 NEXT;
493 count++;
494 }
495 if (RAW == ';') {
496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
497 ctxt->nbChars ++;
498 ctxt->input->cur++;
499 }
500 } else if ((RAW == '&') && (NXT(1) == '#')) {
501 SKIP(2);
502 GROW;
503 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000504 if (count++ > 20) {
505 count = 0;
506 GROW;
507 }
508 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000509 val = val * 10 + (CUR - '0');
510 else {
511 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
513 ctxt->sax->error(ctxt->userData,
514 "xmlParseCharRef: invalid decimal value\n");
515 ctxt->wellFormed = 0;
516 ctxt->disableSAX = 1;
517 val = 0;
518 break;
519 }
520 NEXT;
521 count++;
522 }
523 if (RAW == ';') {
524 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
525 ctxt->nbChars ++;
526 ctxt->input->cur++;
527 }
528 } else {
529 ctxt->errNo = XML_ERR_INVALID_CHARREF;
530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
531 ctxt->sax->error(ctxt->userData,
532 "xmlParseCharRef: invalid value\n");
533 ctxt->wellFormed = 0;
534 ctxt->disableSAX = 1;
535 }
536
537 /*
538 * [ WFC: Legal Character ]
539 * Characters referred to using character references must match the
540 * production for Char.
541 */
542 if (IS_CHAR(val)) {
543 return(val);
544 } else {
545 ctxt->errNo = XML_ERR_INVALID_CHAR;
546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000547 ctxt->sax->error(ctxt->userData,
548 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000549 val);
550 ctxt->wellFormed = 0;
551 ctxt->disableSAX = 1;
552 }
553 return(0);
554}
555
556/**
557 * xmlParseStringCharRef:
558 * @ctxt: an XML parser context
559 * @str: a pointer to an index in the string
560 *
561 * parse Reference declarations, variant parsing from a string rather
562 * than an an input flow.
563 *
564 * [66] CharRef ::= '&#' [0-9]+ ';' |
565 * '&#x' [0-9a-fA-F]+ ';'
566 *
567 * [ WFC: Legal Character ]
568 * Characters referred to using character references must match the
569 * production for Char.
570 *
571 * Returns the value parsed (as an int), 0 in case of error, str will be
572 * updated to the current value of the index
573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000574static int
Owen Taylor3473f882001-02-23 17:55:21 +0000575xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
576 const xmlChar *ptr;
577 xmlChar cur;
578 int val = 0;
579
580 if ((str == NULL) || (*str == NULL)) return(0);
581 ptr = *str;
582 cur = *ptr;
583 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
584 ptr += 3;
585 cur = *ptr;
586 while (cur != ';') { /* Non input consuming loop */
587 if ((cur >= '0') && (cur <= '9'))
588 val = val * 16 + (cur - '0');
589 else if ((cur >= 'a') && (cur <= 'f'))
590 val = val * 16 + (cur - 'a') + 10;
591 else if ((cur >= 'A') && (cur <= 'F'))
592 val = val * 16 + (cur - 'A') + 10;
593 else {
594 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
596 ctxt->sax->error(ctxt->userData,
597 "xmlParseStringCharRef: invalid hexadecimal value\n");
598 ctxt->wellFormed = 0;
599 ctxt->disableSAX = 1;
600 val = 0;
601 break;
602 }
603 ptr++;
604 cur = *ptr;
605 }
606 if (cur == ';')
607 ptr++;
608 } else if ((cur == '&') && (ptr[1] == '#')){
609 ptr += 2;
610 cur = *ptr;
611 while (cur != ';') { /* Non input consuming loops */
612 if ((cur >= '0') && (cur <= '9'))
613 val = val * 10 + (cur - '0');
614 else {
615 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseStringCharRef: invalid decimal value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 val = 0;
622 break;
623 }
624 ptr++;
625 cur = *ptr;
626 }
627 if (cur == ';')
628 ptr++;
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHARREF;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000633 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 return(0);
637 }
638 *str = ptr;
639
640 /*
641 * [ WFC: Legal Character ]
642 * Characters referred to using character references must match the
643 * production for Char.
644 */
645 if (IS_CHAR(val)) {
646 return(val);
647 } else {
648 ctxt->errNo = XML_ERR_INVALID_CHAR;
649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
650 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000651 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000652 ctxt->wellFormed = 0;
653 ctxt->disableSAX = 1;
654 }
655 return(0);
656}
657
658/**
659 * xmlParserHandlePEReference:
660 * @ctxt: the parser context
661 *
662 * [69] PEReference ::= '%' Name ';'
663 *
664 * [ WFC: No Recursion ]
665 * A parsed entity must not contain a recursive
666 * reference to itself, either directly or indirectly.
667 *
668 * [ WFC: Entity Declared ]
669 * In a document without any DTD, a document with only an internal DTD
670 * subset which contains no parameter entity references, or a document
671 * with "standalone='yes'", ... ... The declaration of a parameter
672 * entity must precede any reference to it...
673 *
674 * [ VC: Entity Declared ]
675 * In a document with an external subset or external parameter entities
676 * with "standalone='no'", ... ... The declaration of a parameter entity
677 * must precede any reference to it...
678 *
679 * [ WFC: In DTD ]
680 * Parameter-entity references may only appear in the DTD.
681 * NOTE: misleading but this is handled.
682 *
683 * A PEReference may have been detected in the current input stream
684 * the handling is done accordingly to
685 * http://www.w3.org/TR/REC-xml#entproc
686 * i.e.
687 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000688 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000689 */
690void
691xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
692 xmlChar *name;
693 xmlEntityPtr entity = NULL;
694 xmlParserInputPtr input;
695
696 if (ctxt->token != 0) {
697 return;
698 }
699 if (RAW != '%') return;
700 switch(ctxt->instate) {
701 case XML_PARSER_CDATA_SECTION:
702 return;
703 case XML_PARSER_COMMENT:
704 return;
705 case XML_PARSER_START_TAG:
706 return;
707 case XML_PARSER_END_TAG:
708 return;
709 case XML_PARSER_EOF:
710 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
712 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
713 ctxt->wellFormed = 0;
714 ctxt->disableSAX = 1;
715 return;
716 case XML_PARSER_PROLOG:
717 case XML_PARSER_START:
718 case XML_PARSER_MISC:
719 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
721 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
722 ctxt->wellFormed = 0;
723 ctxt->disableSAX = 1;
724 return;
725 case XML_PARSER_ENTITY_DECL:
726 case XML_PARSER_CONTENT:
727 case XML_PARSER_ATTRIBUTE_VALUE:
728 case XML_PARSER_PI:
729 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000730 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000731 /* we just ignore it there */
732 return;
733 case XML_PARSER_EPILOG:
734 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
735 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
736 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
737 ctxt->wellFormed = 0;
738 ctxt->disableSAX = 1;
739 return;
740 case XML_PARSER_ENTITY_VALUE:
741 /*
742 * NOTE: in the case of entity values, we don't do the
743 * substitution here since we need the literal
744 * entity value to be able to save the internal
745 * subset of the document.
746 * This will be handled by xmlStringDecodeEntities
747 */
748 return;
749 case XML_PARSER_DTD:
750 /*
751 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
752 * In the internal DTD subset, parameter-entity references
753 * can occur only where markup declarations can occur, not
754 * within markup declarations.
755 * In that case this is handled in xmlParseMarkupDecl
756 */
757 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
758 return;
759 break;
760 case XML_PARSER_IGNORE:
761 return;
762 }
763
764 NEXT;
765 name = xmlParseName(ctxt);
766 if (xmlParserDebugEntities)
767 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000768 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000769 if (name == NULL) {
770 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000772 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000773 ctxt->wellFormed = 0;
774 ctxt->disableSAX = 1;
775 } else {
776 if (RAW == ';') {
777 NEXT;
778 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
779 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
780 if (entity == NULL) {
781
782 /*
783 * [ WFC: Entity Declared ]
784 * In a document without any DTD, a document with only an
785 * internal DTD subset which contains no parameter entity
786 * references, or a document with "standalone='yes'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((ctxt->standalone == 1) ||
791 ((ctxt->hasExternalSubset == 0) &&
792 (ctxt->hasPErefs == 0))) {
793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
794 ctxt->sax->error(ctxt->userData,
795 "PEReference: %%%s; not found\n", name);
796 ctxt->wellFormed = 0;
797 ctxt->disableSAX = 1;
798 } else {
799 /*
800 * [ VC: Entity Declared ]
801 * In a document with an external subset or external
802 * parameter entities with "standalone='no'", ...
803 * ... The declaration of a parameter entity must precede
804 * any reference to it...
805 */
806 if ((!ctxt->disableSAX) &&
807 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
808 ctxt->vctxt.error(ctxt->vctxt.userData,
809 "PEReference: %%%s; not found\n", name);
810 } else if ((!ctxt->disableSAX) &&
811 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
812 ctxt->sax->warning(ctxt->userData,
813 "PEReference: %%%s; not found\n", name);
814 ctxt->valid = 0;
815 }
816 } else {
817 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
818 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000819 xmlChar start[4];
820 xmlCharEncoding enc;
821
Owen Taylor3473f882001-02-23 17:55:21 +0000822 /*
823 * handle the extra spaces added before and after
824 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000825 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000826 */
827 input = xmlNewEntityInputStream(ctxt, entity);
828 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000829
830 /*
831 * Get the 4 first bytes and decode the charset
832 * if enc != XML_CHAR_ENCODING_NONE
833 * plug some encoding conversion routines.
834 */
835 GROW
836 start[0] = RAW;
837 start[1] = NXT(1);
838 start[2] = NXT(2);
839 start[3] = NXT(3);
840 enc = xmlDetectCharEncoding(start, 4);
841 if (enc != XML_CHAR_ENCODING_NONE) {
842 xmlSwitchEncoding(ctxt, enc);
843 }
844
Owen Taylor3473f882001-02-23 17:55:21 +0000845 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
846 (RAW == '<') && (NXT(1) == '?') &&
847 (NXT(2) == 'x') && (NXT(3) == 'm') &&
848 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
849 xmlParseTextDecl(ctxt);
850 }
851 if (ctxt->token == 0)
852 ctxt->token = ' ';
853 } else {
854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
855 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000856 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000857 name);
858 ctxt->wellFormed = 0;
859 ctxt->disableSAX = 1;
860 }
861 }
862 } else {
863 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
865 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000866 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000867 ctxt->wellFormed = 0;
868 ctxt->disableSAX = 1;
869 }
870 xmlFree(name);
871 }
872}
873
874/*
875 * Macro used to grow the current buffer.
876 */
877#define growBuffer(buffer) { \
878 buffer##_size *= 2; \
879 buffer = (xmlChar *) \
880 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
881 if (buffer == NULL) { \
882 perror("realloc failed"); \
883 return(NULL); \
884 } \
885}
886
887/**
888 * xmlStringDecodeEntities:
889 * @ctxt: the parser context
890 * @str: the input string
891 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
892 * @end: an end marker xmlChar, 0 if none
893 * @end2: an end marker xmlChar, 0 if none
894 * @end3: an end marker xmlChar, 0 if none
895 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000896 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000897 *
898 * [67] Reference ::= EntityRef | CharRef
899 *
900 * [69] PEReference ::= '%' Name ';'
901 *
902 * Returns A newly allocated string with the substitution done. The caller
903 * must deallocate it !
904 */
905xmlChar *
906xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
907 xmlChar end, xmlChar end2, xmlChar end3) {
908 xmlChar *buffer = NULL;
909 int buffer_size = 0;
910
911 xmlChar *current = NULL;
912 xmlEntityPtr ent;
913 int c,l;
914 int nbchars = 0;
915
916 if (str == NULL)
917 return(NULL);
918
919 if (ctxt->depth > 40) {
920 ctxt->errNo = XML_ERR_ENTITY_LOOP;
921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
922 ctxt->sax->error(ctxt->userData,
923 "Detected entity reference loop\n");
924 ctxt->wellFormed = 0;
925 ctxt->disableSAX = 1;
926 return(NULL);
927 }
928
929 /*
930 * allocate a translation buffer.
931 */
932 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
933 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
934 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000935 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000936 return(NULL);
937 }
938
939 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000940 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000941 * we are operating on already parsed values.
942 */
943 c = CUR_SCHAR(str, l);
944 while ((c != 0) && (c != end) && /* non input consuming loop */
945 (c != end2) && (c != end3)) {
946
947 if (c == 0) break;
948 if ((c == '&') && (str[1] == '#')) {
949 int val = xmlParseStringCharRef(ctxt, &str);
950 if (val != 0) {
951 COPY_BUF(0,buffer,nbchars,val);
952 }
953 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
954 if (xmlParserDebugEntities)
955 xmlGenericError(xmlGenericErrorContext,
956 "String decoding Entity Reference: %.30s\n",
957 str);
958 ent = xmlParseStringEntityRef(ctxt, &str);
959 if ((ent != NULL) &&
960 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
961 if (ent->content != NULL) {
962 COPY_BUF(0,buffer,nbchars,ent->content[0]);
963 } else {
964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
965 ctxt->sax->error(ctxt->userData,
966 "internal error entity has no content\n");
967 }
968 } else if ((ent != NULL) && (ent->content != NULL)) {
969 xmlChar *rep;
970
971 ctxt->depth++;
972 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
973 0, 0, 0);
974 ctxt->depth--;
975 if (rep != NULL) {
976 current = rep;
977 while (*current != 0) { /* non input consuming loop */
978 buffer[nbchars++] = *current++;
979 if (nbchars >
980 buffer_size - XML_PARSER_BUFFER_SIZE) {
981 growBuffer(buffer);
982 }
983 }
984 xmlFree(rep);
985 }
986 } else if (ent != NULL) {
987 int i = xmlStrlen(ent->name);
988 const xmlChar *cur = ent->name;
989
990 buffer[nbchars++] = '&';
991 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
992 growBuffer(buffer);
993 }
994 for (;i > 0;i--)
995 buffer[nbchars++] = *cur++;
996 buffer[nbchars++] = ';';
997 }
998 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
999 if (xmlParserDebugEntities)
1000 xmlGenericError(xmlGenericErrorContext,
1001 "String decoding PE Reference: %.30s\n", str);
1002 ent = xmlParseStringPEReference(ctxt, &str);
1003 if (ent != NULL) {
1004 xmlChar *rep;
1005
1006 ctxt->depth++;
1007 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1008 0, 0, 0);
1009 ctxt->depth--;
1010 if (rep != NULL) {
1011 current = rep;
1012 while (*current != 0) { /* non input consuming loop */
1013 buffer[nbchars++] = *current++;
1014 if (nbchars >
1015 buffer_size - XML_PARSER_BUFFER_SIZE) {
1016 growBuffer(buffer);
1017 }
1018 }
1019 xmlFree(rep);
1020 }
1021 }
1022 } else {
1023 COPY_BUF(l,buffer,nbchars,c);
1024 str += l;
1025 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1026 growBuffer(buffer);
1027 }
1028 }
1029 c = CUR_SCHAR(str, l);
1030 }
1031 buffer[nbchars++] = 0;
1032 return(buffer);
1033}
1034
1035
1036/************************************************************************
1037 * *
1038 * Commodity functions to handle xmlChars *
1039 * *
1040 ************************************************************************/
1041
1042/**
1043 * xmlStrndup:
1044 * @cur: the input xmlChar *
1045 * @len: the len of @cur
1046 *
1047 * a strndup for array of xmlChar's
1048 *
1049 * Returns a new xmlChar * or NULL
1050 */
1051xmlChar *
1052xmlStrndup(const xmlChar *cur, int len) {
1053 xmlChar *ret;
1054
1055 if ((cur == NULL) || (len < 0)) return(NULL);
1056 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1057 if (ret == NULL) {
1058 xmlGenericError(xmlGenericErrorContext,
1059 "malloc of %ld byte failed\n",
1060 (len + 1) * (long)sizeof(xmlChar));
1061 return(NULL);
1062 }
1063 memcpy(ret, cur, len * sizeof(xmlChar));
1064 ret[len] = 0;
1065 return(ret);
1066}
1067
1068/**
1069 * xmlStrdup:
1070 * @cur: the input xmlChar *
1071 *
1072 * a strdup for array of xmlChar's. Since they are supposed to be
1073 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1074 * a termination mark of '0'.
1075 *
1076 * Returns a new xmlChar * or NULL
1077 */
1078xmlChar *
1079xmlStrdup(const xmlChar *cur) {
1080 const xmlChar *p = cur;
1081
1082 if (cur == NULL) return(NULL);
1083 while (*p != 0) p++; /* non input consuming */
1084 return(xmlStrndup(cur, p - cur));
1085}
1086
1087/**
1088 * xmlCharStrndup:
1089 * @cur: the input char *
1090 * @len: the len of @cur
1091 *
1092 * a strndup for char's to xmlChar's
1093 *
1094 * Returns a new xmlChar * or NULL
1095 */
1096
1097xmlChar *
1098xmlCharStrndup(const char *cur, int len) {
1099 int i;
1100 xmlChar *ret;
1101
1102 if ((cur == NULL) || (len < 0)) return(NULL);
1103 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1104 if (ret == NULL) {
1105 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1106 (len + 1) * (long)sizeof(xmlChar));
1107 return(NULL);
1108 }
1109 for (i = 0;i < len;i++)
1110 ret[i] = (xmlChar) cur[i];
1111 ret[len] = 0;
1112 return(ret);
1113}
1114
1115/**
1116 * xmlCharStrdup:
1117 * @cur: the input char *
1118 * @len: the len of @cur
1119 *
1120 * a strdup for char's to xmlChar's
1121 *
1122 * Returns a new xmlChar * or NULL
1123 */
1124
1125xmlChar *
1126xmlCharStrdup(const char *cur) {
1127 const char *p = cur;
1128
1129 if (cur == NULL) return(NULL);
1130 while (*p != '\0') p++; /* non input consuming */
1131 return(xmlCharStrndup(cur, p - cur));
1132}
1133
1134/**
1135 * xmlStrcmp:
1136 * @str1: the first xmlChar *
1137 * @str2: the second xmlChar *
1138 *
1139 * a strcmp for xmlChar's
1140 *
1141 * Returns the integer result of the comparison
1142 */
1143
1144int
1145xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1146 register int tmp;
1147
1148 if (str1 == str2) return(0);
1149 if (str1 == NULL) return(-1);
1150 if (str2 == NULL) return(1);
1151 do {
1152 tmp = *str1++ - *str2;
1153 if (tmp != 0) return(tmp);
1154 } while (*str2++ != 0);
1155 return 0;
1156}
1157
1158/**
1159 * xmlStrEqual:
1160 * @str1: the first xmlChar *
1161 * @str2: the second xmlChar *
1162 *
1163 * Check if both string are equal of have same content
1164 * Should be a bit more readable and faster than xmlStrEqual()
1165 *
1166 * Returns 1 if they are equal, 0 if they are different
1167 */
1168
1169int
1170xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1171 if (str1 == str2) return(1);
1172 if (str1 == NULL) return(0);
1173 if (str2 == NULL) return(0);
1174 do {
1175 if (*str1++ != *str2) return(0);
1176 } while (*str2++);
1177 return(1);
1178}
1179
1180/**
1181 * xmlStrncmp:
1182 * @str1: the first xmlChar *
1183 * @str2: the second xmlChar *
1184 * @len: the max comparison length
1185 *
1186 * a strncmp for xmlChar's
1187 *
1188 * Returns the integer result of the comparison
1189 */
1190
1191int
1192xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1193 register int tmp;
1194
1195 if (len <= 0) return(0);
1196 if (str1 == str2) return(0);
1197 if (str1 == NULL) return(-1);
1198 if (str2 == NULL) return(1);
1199 do {
1200 tmp = *str1++ - *str2;
1201 if (tmp != 0 || --len == 0) return(tmp);
1202 } while (*str2++ != 0);
1203 return 0;
1204}
1205
Daniel Veillardb44025c2001-10-11 22:55:55 +00001206static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1208 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1209 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1210 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1211 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1212 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1213 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1214 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1215 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1216 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1217 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1218 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1219 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1220 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1221 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1222 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1223 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1224 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1225 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1226 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1227 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1228 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1229 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1230 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1231 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1232 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1233 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1234 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1235 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1236 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1237 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1238 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1239};
1240
1241/**
1242 * xmlStrcasecmp:
1243 * @str1: the first xmlChar *
1244 * @str2: the second xmlChar *
1245 *
1246 * a strcasecmp for xmlChar's
1247 *
1248 * Returns the integer result of the comparison
1249 */
1250
1251int
1252xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1253 register int tmp;
1254
1255 if (str1 == str2) return(0);
1256 if (str1 == NULL) return(-1);
1257 if (str2 == NULL) return(1);
1258 do {
1259 tmp = casemap[*str1++] - casemap[*str2];
1260 if (tmp != 0) return(tmp);
1261 } while (*str2++ != 0);
1262 return 0;
1263}
1264
1265/**
1266 * xmlStrncasecmp:
1267 * @str1: the first xmlChar *
1268 * @str2: the second xmlChar *
1269 * @len: the max comparison length
1270 *
1271 * a strncasecmp for xmlChar's
1272 *
1273 * Returns the integer result of the comparison
1274 */
1275
1276int
1277xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1278 register int tmp;
1279
1280 if (len <= 0) return(0);
1281 if (str1 == str2) return(0);
1282 if (str1 == NULL) return(-1);
1283 if (str2 == NULL) return(1);
1284 do {
1285 tmp = casemap[*str1++] - casemap[*str2];
1286 if (tmp != 0 || --len == 0) return(tmp);
1287 } while (*str2++ != 0);
1288 return 0;
1289}
1290
1291/**
1292 * xmlStrchr:
1293 * @str: the xmlChar * array
1294 * @val: the xmlChar to search
1295 *
1296 * a strchr for xmlChar's
1297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001298 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001299 */
1300
1301const xmlChar *
1302xmlStrchr(const xmlChar *str, xmlChar val) {
1303 if (str == NULL) return(NULL);
1304 while (*str != 0) { /* non input consuming */
1305 if (*str == val) return((xmlChar *) str);
1306 str++;
1307 }
1308 return(NULL);
1309}
1310
1311/**
1312 * xmlStrstr:
1313 * @str: the xmlChar * array (haystack)
1314 * @val: the xmlChar to search (needle)
1315 *
1316 * a strstr for xmlChar's
1317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001318 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001319 */
1320
1321const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001322xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001323 int n;
1324
1325 if (str == NULL) return(NULL);
1326 if (val == NULL) return(NULL);
1327 n = xmlStrlen(val);
1328
1329 if (n == 0) return(str);
1330 while (*str != 0) { /* non input consuming */
1331 if (*str == *val) {
1332 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1333 }
1334 str++;
1335 }
1336 return(NULL);
1337}
1338
1339/**
1340 * xmlStrcasestr:
1341 * @str: the xmlChar * array (haystack)
1342 * @val: the xmlChar to search (needle)
1343 *
1344 * a case-ignoring strstr for xmlChar's
1345 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001346 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001347 */
1348
1349const xmlChar *
1350xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1351 int n;
1352
1353 if (str == NULL) return(NULL);
1354 if (val == NULL) return(NULL);
1355 n = xmlStrlen(val);
1356
1357 if (n == 0) return(str);
1358 while (*str != 0) { /* non input consuming */
1359 if (casemap[*str] == casemap[*val])
1360 if (!xmlStrncasecmp(str, val, n)) return(str);
1361 str++;
1362 }
1363 return(NULL);
1364}
1365
1366/**
1367 * xmlStrsub:
1368 * @str: the xmlChar * array (haystack)
1369 * @start: the index of the first char (zero based)
1370 * @len: the length of the substring
1371 *
1372 * Extract a substring of a given string
1373 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001374 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001375 */
1376
1377xmlChar *
1378xmlStrsub(const xmlChar *str, int start, int len) {
1379 int i;
1380
1381 if (str == NULL) return(NULL);
1382 if (start < 0) return(NULL);
1383 if (len < 0) return(NULL);
1384
1385 for (i = 0;i < start;i++) {
1386 if (*str == 0) return(NULL);
1387 str++;
1388 }
1389 if (*str == 0) return(NULL);
1390 return(xmlStrndup(str, len));
1391}
1392
1393/**
1394 * xmlStrlen:
1395 * @str: the xmlChar * array
1396 *
1397 * length of a xmlChar's string
1398 *
1399 * Returns the number of xmlChar contained in the ARRAY.
1400 */
1401
1402int
1403xmlStrlen(const xmlChar *str) {
1404 int len = 0;
1405
1406 if (str == NULL) return(0);
1407 while (*str != 0) { /* non input consuming */
1408 str++;
1409 len++;
1410 }
1411 return(len);
1412}
1413
1414/**
1415 * xmlStrncat:
1416 * @cur: the original xmlChar * array
1417 * @add: the xmlChar * array added
1418 * @len: the length of @add
1419 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001420 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001421 * first bytes of @add.
1422 *
1423 * Returns a new xmlChar *, the original @cur is reallocated if needed
1424 * and should not be freed
1425 */
1426
1427xmlChar *
1428xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1429 int size;
1430 xmlChar *ret;
1431
1432 if ((add == NULL) || (len == 0))
1433 return(cur);
1434 if (cur == NULL)
1435 return(xmlStrndup(add, len));
1436
1437 size = xmlStrlen(cur);
1438 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1439 if (ret == NULL) {
1440 xmlGenericError(xmlGenericErrorContext,
1441 "xmlStrncat: realloc of %ld byte failed\n",
1442 (size + len + 1) * (long)sizeof(xmlChar));
1443 return(cur);
1444 }
1445 memcpy(&ret[size], add, len * sizeof(xmlChar));
1446 ret[size + len] = 0;
1447 return(ret);
1448}
1449
1450/**
1451 * xmlStrcat:
1452 * @cur: the original xmlChar * array
1453 * @add: the xmlChar * array added
1454 *
1455 * a strcat for array of xmlChar's. Since they are supposed to be
1456 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1457 * a termination mark of '0'.
1458 *
1459 * Returns a new xmlChar * containing the concatenated string.
1460 */
1461xmlChar *
1462xmlStrcat(xmlChar *cur, const xmlChar *add) {
1463 const xmlChar *p = add;
1464
1465 if (add == NULL) return(cur);
1466 if (cur == NULL)
1467 return(xmlStrdup(add));
1468
1469 while (*p != 0) p++; /* non input consuming */
1470 return(xmlStrncat(cur, add, p - add));
1471}
1472
1473/************************************************************************
1474 * *
1475 * Commodity functions, cleanup needed ? *
1476 * *
1477 ************************************************************************/
1478
1479/**
1480 * areBlanks:
1481 * @ctxt: an XML parser context
1482 * @str: a xmlChar *
1483 * @len: the size of @str
1484 *
1485 * Is this a sequence of blank chars that one can ignore ?
1486 *
1487 * Returns 1 if ignorable 0 otherwise.
1488 */
1489
1490static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1491 int i, ret;
1492 xmlNodePtr lastChild;
1493
Daniel Veillard05c13a22001-09-09 08:38:09 +00001494 /*
1495 * Don't spend time trying to differentiate them, the same callback is
1496 * used !
1497 */
1498 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001499 return(0);
1500
Owen Taylor3473f882001-02-23 17:55:21 +00001501 /*
1502 * Check for xml:space value.
1503 */
1504 if (*(ctxt->space) == 1)
1505 return(0);
1506
1507 /*
1508 * Check that the string is made of blanks
1509 */
1510 for (i = 0;i < len;i++)
1511 if (!(IS_BLANK(str[i]))) return(0);
1512
1513 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001514 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001515 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001516 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (ctxt->myDoc != NULL) {
1518 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1519 if (ret == 0) return(1);
1520 if (ret == 1) return(0);
1521 }
1522
1523 /*
1524 * Otherwise, heuristic :-\
1525 */
Owen Taylor3473f882001-02-23 17:55:21 +00001526 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 if ((ctxt->node->children == NULL) &&
1528 (RAW == '<') && (NXT(1) == '/')) return(0);
1529
1530 lastChild = xmlGetLastChild(ctxt->node);
1531 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001532 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1533 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001534 } else if (xmlNodeIsText(lastChild))
1535 return(0);
1536 else if ((ctxt->node->children != NULL) &&
1537 (xmlNodeIsText(ctxt->node->children)))
1538 return(0);
1539 return(1);
1540}
1541
Owen Taylor3473f882001-02-23 17:55:21 +00001542/************************************************************************
1543 * *
1544 * Extra stuff for namespace support *
1545 * Relates to http://www.w3.org/TR/WD-xml-names *
1546 * *
1547 ************************************************************************/
1548
1549/**
1550 * xmlSplitQName:
1551 * @ctxt: an XML parser context
1552 * @name: an XML parser context
1553 * @prefix: a xmlChar **
1554 *
1555 * parse an UTF8 encoded XML qualified name string
1556 *
1557 * [NS 5] QName ::= (Prefix ':')? LocalPart
1558 *
1559 * [NS 6] Prefix ::= NCName
1560 *
1561 * [NS 7] LocalPart ::= NCName
1562 *
1563 * Returns the local part, and prefix is updated
1564 * to get the Prefix if any.
1565 */
1566
1567xmlChar *
1568xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1569 xmlChar buf[XML_MAX_NAMELEN + 5];
1570 xmlChar *buffer = NULL;
1571 int len = 0;
1572 int max = XML_MAX_NAMELEN;
1573 xmlChar *ret = NULL;
1574 const xmlChar *cur = name;
1575 int c;
1576
1577 *prefix = NULL;
1578
1579#ifndef XML_XML_NAMESPACE
1580 /* xml: prefix is not really a namespace */
1581 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1582 (cur[2] == 'l') && (cur[3] == ':'))
1583 return(xmlStrdup(name));
1584#endif
1585
1586 /* nasty but valid */
1587 if (cur[0] == ':')
1588 return(xmlStrdup(name));
1589
1590 c = *cur++;
1591 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1592 buf[len++] = c;
1593 c = *cur++;
1594 }
1595 if (len >= max) {
1596 /*
1597 * Okay someone managed to make a huge name, so he's ready to pay
1598 * for the processing speed.
1599 */
1600 max = len * 2;
1601
1602 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1603 if (buffer == NULL) {
1604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1605 ctxt->sax->error(ctxt->userData,
1606 "xmlSplitQName: out of memory\n");
1607 return(NULL);
1608 }
1609 memcpy(buffer, buf, len);
1610 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1611 if (len + 10 > max) {
1612 max *= 2;
1613 buffer = (xmlChar *) xmlRealloc(buffer,
1614 max * sizeof(xmlChar));
1615 if (buffer == NULL) {
1616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1617 ctxt->sax->error(ctxt->userData,
1618 "xmlSplitQName: out of memory\n");
1619 return(NULL);
1620 }
1621 }
1622 buffer[len++] = c;
1623 c = *cur++;
1624 }
1625 buffer[len] = 0;
1626 }
1627
1628 if (buffer == NULL)
1629 ret = xmlStrndup(buf, len);
1630 else {
1631 ret = buffer;
1632 buffer = NULL;
1633 max = XML_MAX_NAMELEN;
1634 }
1635
1636
1637 if (c == ':') {
1638 c = *cur++;
1639 if (c == 0) return(ret);
1640 *prefix = ret;
1641 len = 0;
1642
1643 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1644 buf[len++] = c;
1645 c = *cur++;
1646 }
1647 if (len >= max) {
1648 /*
1649 * Okay someone managed to make a huge name, so he's ready to pay
1650 * for the processing speed.
1651 */
1652 max = len * 2;
1653
1654 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1655 if (buffer == NULL) {
1656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1657 ctxt->sax->error(ctxt->userData,
1658 "xmlSplitQName: out of memory\n");
1659 return(NULL);
1660 }
1661 memcpy(buffer, buf, len);
1662 while (c != 0) { /* tested bigname2.xml */
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlSplitQName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 buffer[len++] = c;
1675 c = *cur++;
1676 }
1677 buffer[len] = 0;
1678 }
1679
1680 if (buffer == NULL)
1681 ret = xmlStrndup(buf, len);
1682 else {
1683 ret = buffer;
1684 }
1685 }
1686
1687 return(ret);
1688}
1689
1690/************************************************************************
1691 * *
1692 * The parser itself *
1693 * Relates to http://www.w3.org/TR/REC-xml *
1694 * *
1695 ************************************************************************/
1696
Daniel Veillard76d66f42001-05-16 21:05:17 +00001697static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001698/**
1699 * xmlParseName:
1700 * @ctxt: an XML parser context
1701 *
1702 * parse an XML name.
1703 *
1704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1705 * CombiningChar | Extender
1706 *
1707 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1708 *
1709 * [6] Names ::= Name (S Name)*
1710 *
1711 * Returns the Name parsed or NULL
1712 */
1713
1714xmlChar *
1715xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001716 const xmlChar *in;
1717 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
1719
1720 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721
1722 /*
1723 * Accelerator for simple ASCII names
1724 */
1725 in = ctxt->input->cur;
1726 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1727 ((*in >= 0x41) && (*in <= 0x5A)) ||
1728 (*in == '_') || (*in == ':')) {
1729 in++;
1730 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1731 ((*in >= 0x41) && (*in <= 0x5A)) ||
1732 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733 (*in == '_') || (*in == '-') ||
1734 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737 count = in - ctxt->input->cur;
1738 ret = xmlStrndup(ctxt->input->cur, count);
1739 ctxt->input->cur = in;
1740 return(ret);
1741 }
1742 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001743 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001744}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001745
Daniel Veillard76d66f42001-05-16 21:05:17 +00001746static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1748 xmlChar buf[XML_MAX_NAMELEN + 5];
1749 int len = 0, l;
1750 int c;
1751 int count = 0;
1752
1753 /*
1754 * Handler for more complex cases
1755 */
1756 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001757 c = CUR_CHAR(l);
1758 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1759 (!IS_LETTER(c) && (c != '_') &&
1760 (c != ':'))) {
1761 return(NULL);
1762 }
1763
1764 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1765 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1766 (c == '.') || (c == '-') ||
1767 (c == '_') || (c == ':') ||
1768 (IS_COMBINING(c)) ||
1769 (IS_EXTENDER(c)))) {
1770 if (count++ > 100) {
1771 count = 0;
1772 GROW;
1773 }
1774 COPY_BUF(l,buf,len,c);
1775 NEXTL(l);
1776 c = CUR_CHAR(l);
1777 if (len >= XML_MAX_NAMELEN) {
1778 /*
1779 * Okay someone managed to make a huge name, so he's ready to pay
1780 * for the processing speed.
1781 */
1782 xmlChar *buffer;
1783 int max = len * 2;
1784
1785 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1786 if (buffer == NULL) {
1787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1788 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001789 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001790 return(NULL);
1791 }
1792 memcpy(buffer, buf, len);
1793 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1794 (c == '.') || (c == '-') ||
1795 (c == '_') || (c == ':') ||
1796 (IS_COMBINING(c)) ||
1797 (IS_EXTENDER(c))) {
1798 if (count++ > 100) {
1799 count = 0;
1800 GROW;
1801 }
1802 if (len + 10 > max) {
1803 max *= 2;
1804 buffer = (xmlChar *) xmlRealloc(buffer,
1805 max * sizeof(xmlChar));
1806 if (buffer == NULL) {
1807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1808 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001809 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001810 return(NULL);
1811 }
1812 }
1813 COPY_BUF(l,buffer,len,c);
1814 NEXTL(l);
1815 c = CUR_CHAR(l);
1816 }
1817 buffer[len] = 0;
1818 return(buffer);
1819 }
1820 }
1821 return(xmlStrndup(buf, len));
1822}
1823
1824/**
1825 * xmlParseStringName:
1826 * @ctxt: an XML parser context
1827 * @str: a pointer to the string pointer (IN/OUT)
1828 *
1829 * parse an XML name.
1830 *
1831 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1832 * CombiningChar | Extender
1833 *
1834 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1835 *
1836 * [6] Names ::= Name (S Name)*
1837 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001838 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001839 * is updated to the current location in the string.
1840 */
1841
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001842static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001843xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1844 xmlChar buf[XML_MAX_NAMELEN + 5];
1845 const xmlChar *cur = *str;
1846 int len = 0, l;
1847 int c;
1848
1849 c = CUR_SCHAR(cur, l);
1850 if (!IS_LETTER(c) && (c != '_') &&
1851 (c != ':')) {
1852 return(NULL);
1853 }
1854
1855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1856 (c == '.') || (c == '-') ||
1857 (c == '_') || (c == ':') ||
1858 (IS_COMBINING(c)) ||
1859 (IS_EXTENDER(c))) {
1860 COPY_BUF(l,buf,len,c);
1861 cur += l;
1862 c = CUR_SCHAR(cur, l);
1863 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1864 /*
1865 * Okay someone managed to make a huge name, so he's ready to pay
1866 * for the processing speed.
1867 */
1868 xmlChar *buffer;
1869 int max = len * 2;
1870
1871 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1872 if (buffer == NULL) {
1873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874 ctxt->sax->error(ctxt->userData,
1875 "xmlParseStringName: out of memory\n");
1876 return(NULL);
1877 }
1878 memcpy(buffer, buf, len);
1879 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1880 (c == '.') || (c == '-') ||
1881 (c == '_') || (c == ':') ||
1882 (IS_COMBINING(c)) ||
1883 (IS_EXTENDER(c))) {
1884 if (len + 10 > max) {
1885 max *= 2;
1886 buffer = (xmlChar *) xmlRealloc(buffer,
1887 max * sizeof(xmlChar));
1888 if (buffer == NULL) {
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "xmlParseStringName: out of memory\n");
1892 return(NULL);
1893 }
1894 }
1895 COPY_BUF(l,buffer,len,c);
1896 cur += l;
1897 c = CUR_SCHAR(cur, l);
1898 }
1899 buffer[len] = 0;
1900 *str = cur;
1901 return(buffer);
1902 }
1903 }
1904 *str = cur;
1905 return(xmlStrndup(buf, len));
1906}
1907
1908/**
1909 * xmlParseNmtoken:
1910 * @ctxt: an XML parser context
1911 *
1912 * parse an XML Nmtoken.
1913 *
1914 * [7] Nmtoken ::= (NameChar)+
1915 *
1916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1917 *
1918 * Returns the Nmtoken parsed or NULL
1919 */
1920
1921xmlChar *
1922xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1923 xmlChar buf[XML_MAX_NAMELEN + 5];
1924 int len = 0, l;
1925 int c;
1926 int count = 0;
1927
1928 GROW;
1929 c = CUR_CHAR(l);
1930
1931 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1932 (c == '.') || (c == '-') ||
1933 (c == '_') || (c == ':') ||
1934 (IS_COMBINING(c)) ||
1935 (IS_EXTENDER(c))) {
1936 if (count++ > 100) {
1937 count = 0;
1938 GROW;
1939 }
1940 COPY_BUF(l,buf,len,c);
1941 NEXTL(l);
1942 c = CUR_CHAR(l);
1943 if (len >= XML_MAX_NAMELEN) {
1944 /*
1945 * Okay someone managed to make a huge token, so he's ready to pay
1946 * for the processing speed.
1947 */
1948 xmlChar *buffer;
1949 int max = len * 2;
1950
1951 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1952 if (buffer == NULL) {
1953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1954 ctxt->sax->error(ctxt->userData,
1955 "xmlParseNmtoken: out of memory\n");
1956 return(NULL);
1957 }
1958 memcpy(buffer, buf, len);
1959 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1960 (c == '.') || (c == '-') ||
1961 (c == '_') || (c == ':') ||
1962 (IS_COMBINING(c)) ||
1963 (IS_EXTENDER(c))) {
1964 if (count++ > 100) {
1965 count = 0;
1966 GROW;
1967 }
1968 if (len + 10 > max) {
1969 max *= 2;
1970 buffer = (xmlChar *) xmlRealloc(buffer,
1971 max * sizeof(xmlChar));
1972 if (buffer == NULL) {
1973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1974 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001975 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001976 return(NULL);
1977 }
1978 }
1979 COPY_BUF(l,buffer,len,c);
1980 NEXTL(l);
1981 c = CUR_CHAR(l);
1982 }
1983 buffer[len] = 0;
1984 return(buffer);
1985 }
1986 }
1987 if (len == 0)
1988 return(NULL);
1989 return(xmlStrndup(buf, len));
1990}
1991
1992/**
1993 * xmlParseEntityValue:
1994 * @ctxt: an XML parser context
1995 * @orig: if non-NULL store a copy of the original entity value
1996 *
1997 * parse a value for ENTITY declarations
1998 *
1999 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2000 * "'" ([^%&'] | PEReference | Reference)* "'"
2001 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002002 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002003 */
2004
2005xmlChar *
2006xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2007 xmlChar *buf = NULL;
2008 int len = 0;
2009 int size = XML_PARSER_BUFFER_SIZE;
2010 int c, l;
2011 xmlChar stop;
2012 xmlChar *ret = NULL;
2013 const xmlChar *cur = NULL;
2014 xmlParserInputPtr input;
2015
2016 if (RAW == '"') stop = '"';
2017 else if (RAW == '\'') stop = '\'';
2018 else {
2019 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2021 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2022 ctxt->wellFormed = 0;
2023 ctxt->disableSAX = 1;
2024 return(NULL);
2025 }
2026 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2027 if (buf == NULL) {
2028 xmlGenericError(xmlGenericErrorContext,
2029 "malloc of %d byte failed\n", size);
2030 return(NULL);
2031 }
2032
2033 /*
2034 * The content of the entity definition is copied in a buffer.
2035 */
2036
2037 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2038 input = ctxt->input;
2039 GROW;
2040 NEXT;
2041 c = CUR_CHAR(l);
2042 /*
2043 * NOTE: 4.4.5 Included in Literal
2044 * When a parameter entity reference appears in a literal entity
2045 * value, ... a single or double quote character in the replacement
2046 * text is always treated as a normal data character and will not
2047 * terminate the literal.
2048 * In practice it means we stop the loop only when back at parsing
2049 * the initial entity and the quote is found
2050 */
2051 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2052 (ctxt->input != input))) {
2053 if (len + 5 >= size) {
2054 size *= 2;
2055 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2056 if (buf == NULL) {
2057 xmlGenericError(xmlGenericErrorContext,
2058 "realloc of %d byte failed\n", size);
2059 return(NULL);
2060 }
2061 }
2062 COPY_BUF(l,buf,len,c);
2063 NEXTL(l);
2064 /*
2065 * Pop-up of finished entities.
2066 */
2067 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2068 xmlPopInput(ctxt);
2069
2070 GROW;
2071 c = CUR_CHAR(l);
2072 if (c == 0) {
2073 GROW;
2074 c = CUR_CHAR(l);
2075 }
2076 }
2077 buf[len] = 0;
2078
2079 /*
2080 * Raise problem w.r.t. '&' and '%' being used in non-entities
2081 * reference constructs. Note Charref will be handled in
2082 * xmlStringDecodeEntities()
2083 */
2084 cur = buf;
2085 while (*cur != 0) { /* non input consuming */
2086 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2087 xmlChar *name;
2088 xmlChar tmp = *cur;
2089
2090 cur++;
2091 name = xmlParseStringName(ctxt, &cur);
2092 if ((name == NULL) || (*cur != ';')) {
2093 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2095 ctxt->sax->error(ctxt->userData,
2096 "EntityValue: '%c' forbidden except for entities references\n",
2097 tmp);
2098 ctxt->wellFormed = 0;
2099 ctxt->disableSAX = 1;
2100 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002101 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2102 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002103 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2105 ctxt->sax->error(ctxt->userData,
2106 "EntityValue: PEReferences forbidden in internal subset\n",
2107 tmp);
2108 ctxt->wellFormed = 0;
2109 ctxt->disableSAX = 1;
2110 }
2111 if (name != NULL)
2112 xmlFree(name);
2113 }
2114 cur++;
2115 }
2116
2117 /*
2118 * Then PEReference entities are substituted.
2119 */
2120 if (c != stop) {
2121 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2123 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2124 ctxt->wellFormed = 0;
2125 ctxt->disableSAX = 1;
2126 xmlFree(buf);
2127 } else {
2128 NEXT;
2129 /*
2130 * NOTE: 4.4.7 Bypassed
2131 * When a general entity reference appears in the EntityValue in
2132 * an entity declaration, it is bypassed and left as is.
2133 * so XML_SUBSTITUTE_REF is not set here.
2134 */
2135 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2136 0, 0, 0);
2137 if (orig != NULL)
2138 *orig = buf;
2139 else
2140 xmlFree(buf);
2141 }
2142
2143 return(ret);
2144}
2145
2146/**
2147 * xmlParseAttValue:
2148 * @ctxt: an XML parser context
2149 *
2150 * parse a value for an attribute
2151 * Note: the parser won't do substitution of entities here, this
2152 * will be handled later in xmlStringGetNodeList
2153 *
2154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2155 * "'" ([^<&'] | Reference)* "'"
2156 *
2157 * 3.3.3 Attribute-Value Normalization:
2158 * Before the value of an attribute is passed to the application or
2159 * checked for validity, the XML processor must normalize it as follows:
2160 * - a character reference is processed by appending the referenced
2161 * character to the attribute value
2162 * - an entity reference is processed by recursively processing the
2163 * replacement text of the entity
2164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2165 * appending #x20 to the normalized value, except that only a single
2166 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2167 * parsed entity or the literal entity value of an internal parsed entity
2168 * - other characters are processed by appending them to the normalized value
2169 * If the declared value is not CDATA, then the XML processor must further
2170 * process the normalized attribute value by discarding any leading and
2171 * trailing space (#x20) characters, and by replacing sequences of space
2172 * (#x20) characters by a single space (#x20) character.
2173 * All attributes for which no declaration has been read should be treated
2174 * by a non-validating parser as if declared CDATA.
2175 *
2176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2177 */
2178
2179xmlChar *
2180xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2181 xmlChar limit = 0;
2182 xmlChar *buf = NULL;
2183 int len = 0;
2184 int buf_size = 0;
2185 int c, l;
2186 xmlChar *current = NULL;
2187 xmlEntityPtr ent;
2188
2189
2190 SHRINK;
2191 if (NXT(0) == '"') {
2192 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2193 limit = '"';
2194 NEXT;
2195 } else if (NXT(0) == '\'') {
2196 limit = '\'';
2197 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2198 NEXT;
2199 } else {
2200 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2202 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2203 ctxt->wellFormed = 0;
2204 ctxt->disableSAX = 1;
2205 return(NULL);
2206 }
2207
2208 /*
2209 * allocate a translation buffer.
2210 */
2211 buf_size = XML_PARSER_BUFFER_SIZE;
2212 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2213 if (buf == NULL) {
2214 perror("xmlParseAttValue: malloc failed");
2215 return(NULL);
2216 }
2217
2218 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002219 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
2221 c = CUR_CHAR(l);
2222 while (((NXT(0) != limit) && /* checked */
2223 (c != '<')) || (ctxt->token != 0)) {
2224 if (c == 0) break;
2225 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002226 if (ctxt->replaceEntities) {
2227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 buf[len++] = '&';
2231 } else {
2232 /*
2233 * The reparsing will be done in xmlStringGetNodeList()
2234 * called by the attribute() function in SAX.c
2235 */
2236 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002237
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (len > buf_size - 10) {
2239 growBuffer(buf);
2240 }
2241 current = &buffer[0];
2242 while (*current != 0) { /* non input consuming */
2243 buf[len++] = *current++;
2244 }
2245 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002246 }
Owen Taylor3473f882001-02-23 17:55:21 +00002247 } else if (c == '&') {
2248 if (NXT(1) == '#') {
2249 int val = xmlParseCharRef(ctxt);
2250 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002251 if (ctxt->replaceEntities) {
2252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 buf[len++] = '&';
2256 } else {
2257 /*
2258 * The reparsing will be done in xmlStringGetNodeList()
2259 * called by the attribute() function in SAX.c
2260 */
2261 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002262
Daniel Veillard319a7422001-09-11 09:27:09 +00002263 if (len > buf_size - 10) {
2264 growBuffer(buf);
2265 }
2266 current = &buffer[0];
2267 while (*current != 0) { /* non input consuming */
2268 buf[len++] = *current++;
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 }
2271 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002272 if (len > buf_size - 10) {
2273 growBuffer(buf);
2274 }
Owen Taylor3473f882001-02-23 17:55:21 +00002275 len += xmlCopyChar(0, &buf[len], val);
2276 }
2277 } else {
2278 ent = xmlParseEntityRef(ctxt);
2279 if ((ent != NULL) &&
2280 (ctxt->replaceEntities != 0)) {
2281 xmlChar *rep;
2282
2283 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2284 rep = xmlStringDecodeEntities(ctxt, ent->content,
2285 XML_SUBSTITUTE_REF, 0, 0, 0);
2286 if (rep != NULL) {
2287 current = rep;
2288 while (*current != 0) { /* non input consuming */
2289 buf[len++] = *current++;
2290 if (len > buf_size - 10) {
2291 growBuffer(buf);
2292 }
2293 }
2294 xmlFree(rep);
2295 }
2296 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002297 if (len > buf_size - 10) {
2298 growBuffer(buf);
2299 }
Owen Taylor3473f882001-02-23 17:55:21 +00002300 if (ent->content != NULL)
2301 buf[len++] = ent->content[0];
2302 }
2303 } else if (ent != NULL) {
2304 int i = xmlStrlen(ent->name);
2305 const xmlChar *cur = ent->name;
2306
2307 /*
2308 * This may look absurd but is needed to detect
2309 * entities problems
2310 */
2311 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2312 (ent->content != NULL)) {
2313 xmlChar *rep;
2314 rep = xmlStringDecodeEntities(ctxt, ent->content,
2315 XML_SUBSTITUTE_REF, 0, 0, 0);
2316 if (rep != NULL)
2317 xmlFree(rep);
2318 }
2319
2320 /*
2321 * Just output the reference
2322 */
2323 buf[len++] = '&';
2324 if (len > buf_size - i - 10) {
2325 growBuffer(buf);
2326 }
2327 for (;i > 0;i--)
2328 buf[len++] = *cur++;
2329 buf[len++] = ';';
2330 }
2331 }
2332 } else {
2333 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2334 COPY_BUF(l,buf,len,0x20);
2335 if (len > buf_size - 10) {
2336 growBuffer(buf);
2337 }
2338 } else {
2339 COPY_BUF(l,buf,len,c);
2340 if (len > buf_size - 10) {
2341 growBuffer(buf);
2342 }
2343 }
2344 NEXTL(l);
2345 }
2346 GROW;
2347 c = CUR_CHAR(l);
2348 }
2349 buf[len++] = 0;
2350 if (RAW == '<') {
2351 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353 ctxt->sax->error(ctxt->userData,
2354 "Unescaped '<' not allowed in attributes values\n");
2355 ctxt->wellFormed = 0;
2356 ctxt->disableSAX = 1;
2357 } else if (RAW != limit) {
2358 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2360 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2361 ctxt->wellFormed = 0;
2362 ctxt->disableSAX = 1;
2363 } else
2364 NEXT;
2365 return(buf);
2366}
2367
2368/**
2369 * xmlParseSystemLiteral:
2370 * @ctxt: an XML parser context
2371 *
2372 * parse an XML Literal
2373 *
2374 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2375 *
2376 * Returns the SystemLiteral parsed or NULL
2377 */
2378
2379xmlChar *
2380xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2381 xmlChar *buf = NULL;
2382 int len = 0;
2383 int size = XML_PARSER_BUFFER_SIZE;
2384 int cur, l;
2385 xmlChar stop;
2386 int state = ctxt->instate;
2387 int count = 0;
2388
2389 SHRINK;
2390 if (RAW == '"') {
2391 NEXT;
2392 stop = '"';
2393 } else if (RAW == '\'') {
2394 NEXT;
2395 stop = '\'';
2396 } else {
2397 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399 ctxt->sax->error(ctxt->userData,
2400 "SystemLiteral \" or ' expected\n");
2401 ctxt->wellFormed = 0;
2402 ctxt->disableSAX = 1;
2403 return(NULL);
2404 }
2405
2406 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "malloc of %d byte failed\n", size);
2410 return(NULL);
2411 }
2412 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2413 cur = CUR_CHAR(l);
2414 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2415 if (len + 5 >= size) {
2416 size *= 2;
2417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2418 if (buf == NULL) {
2419 xmlGenericError(xmlGenericErrorContext,
2420 "realloc of %d byte failed\n", size);
2421 ctxt->instate = (xmlParserInputState) state;
2422 return(NULL);
2423 }
2424 }
2425 count++;
2426 if (count > 50) {
2427 GROW;
2428 count = 0;
2429 }
2430 COPY_BUF(l,buf,len,cur);
2431 NEXTL(l);
2432 cur = CUR_CHAR(l);
2433 if (cur == 0) {
2434 GROW;
2435 SHRINK;
2436 cur = CUR_CHAR(l);
2437 }
2438 }
2439 buf[len] = 0;
2440 ctxt->instate = (xmlParserInputState) state;
2441 if (!IS_CHAR(cur)) {
2442 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2445 ctxt->wellFormed = 0;
2446 ctxt->disableSAX = 1;
2447 } else {
2448 NEXT;
2449 }
2450 return(buf);
2451}
2452
2453/**
2454 * xmlParsePubidLiteral:
2455 * @ctxt: an XML parser context
2456 *
2457 * parse an XML public literal
2458 *
2459 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2460 *
2461 * Returns the PubidLiteral parsed or NULL.
2462 */
2463
2464xmlChar *
2465xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2466 xmlChar *buf = NULL;
2467 int len = 0;
2468 int size = XML_PARSER_BUFFER_SIZE;
2469 xmlChar cur;
2470 xmlChar stop;
2471 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002472 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002473
2474 SHRINK;
2475 if (RAW == '"') {
2476 NEXT;
2477 stop = '"';
2478 } else if (RAW == '\'') {
2479 NEXT;
2480 stop = '\'';
2481 } else {
2482 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2484 ctxt->sax->error(ctxt->userData,
2485 "SystemLiteral \" or ' expected\n");
2486 ctxt->wellFormed = 0;
2487 ctxt->disableSAX = 1;
2488 return(NULL);
2489 }
2490 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2491 if (buf == NULL) {
2492 xmlGenericError(xmlGenericErrorContext,
2493 "malloc of %d byte failed\n", size);
2494 return(NULL);
2495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002497 cur = CUR;
2498 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2499 if (len + 1 >= size) {
2500 size *= 2;
2501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2502 if (buf == NULL) {
2503 xmlGenericError(xmlGenericErrorContext,
2504 "realloc of %d byte failed\n", size);
2505 return(NULL);
2506 }
2507 }
2508 buf[len++] = cur;
2509 count++;
2510 if (count > 50) {
2511 GROW;
2512 count = 0;
2513 }
2514 NEXT;
2515 cur = CUR;
2516 if (cur == 0) {
2517 GROW;
2518 SHRINK;
2519 cur = CUR;
2520 }
2521 }
2522 buf[len] = 0;
2523 if (cur != stop) {
2524 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2527 ctxt->wellFormed = 0;
2528 ctxt->disableSAX = 1;
2529 } else {
2530 NEXT;
2531 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002532 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(buf);
2534}
2535
Daniel Veillard48b2f892001-02-25 16:11:03 +00002536void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002537/**
2538 * xmlParseCharData:
2539 * @ctxt: an XML parser context
2540 * @cdata: int indicating whether we are within a CDATA section
2541 *
2542 * parse a CharData section.
2543 * if we are within a CDATA section ']]>' marks an end of section.
2544 *
2545 * The right angle bracket (>) may be represented using the string "&gt;",
2546 * and must, for compatibility, be escaped using "&gt;" or a character
2547 * reference when it appears in the string "]]>" in content, when that
2548 * string is not marking the end of a CDATA section.
2549 *
2550 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2551 */
2552
2553void
2554xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002555 const xmlChar *in;
2556 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002557 int line = ctxt->input->line;
2558 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559
2560 SHRINK;
2561 GROW;
2562 /*
2563 * Accelerated common case where input don't need to be
2564 * modified before passing it to the handler.
2565 */
2566 if ((ctxt->token == 0) && (!cdata)) {
2567 in = ctxt->input->cur;
2568 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002569get_more:
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002570 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
Daniel Veillard48b2f892001-02-25 16:11:03 +00002571 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2572 in++;
2573 if (*in == 0xA) {
2574 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002575 in++;
2576 while (*in == 0xA) {
2577 ctxt->input->line++;
2578 in++;
2579 }
2580 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002581 }
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002582 if (*in == ']') {
2583 if ((in[1] == ']') && (in[2] == '>')) {
2584 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Sequence ']]>' not allowed in content\n");
2588 ctxt->input->cur = in;
2589 ctxt->wellFormed = 0;
2590 ctxt->disableSAX = 1;
2591 return;
2592 }
2593 in++;
2594 goto get_more;
2595 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002597 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002598 if (IS_BLANK(*ctxt->input->cur)) {
2599 const xmlChar *tmp = ctxt->input->cur;
2600 ctxt->input->cur = in;
2601 if (areBlanks(ctxt, tmp, nbchar)) {
2602 if (ctxt->sax->ignorableWhitespace != NULL)
2603 ctxt->sax->ignorableWhitespace(ctxt->userData,
2604 tmp, nbchar);
2605 } else {
2606 if (ctxt->sax->characters != NULL)
2607 ctxt->sax->characters(ctxt->userData,
2608 tmp, nbchar);
2609 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002610 line = ctxt->input->line;
2611 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002612 } else {
2613 if (ctxt->sax->characters != NULL)
2614 ctxt->sax->characters(ctxt->userData,
2615 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002616 line = ctxt->input->line;
2617 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002618 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 }
2620 ctxt->input->cur = in;
2621 if (*in == 0xD) {
2622 in++;
2623 if (*in == 0xA) {
2624 ctxt->input->cur = in;
2625 in++;
2626 ctxt->input->line++;
2627 continue; /* while */
2628 }
2629 in--;
2630 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002631 if (*in == '<') {
2632 return;
2633 }
2634 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002635 return;
2636 }
2637 SHRINK;
2638 GROW;
2639 in = ctxt->input->cur;
2640 } while ((*in >= 0x20) && (*in <= 0x7F));
2641 nbchar = 0;
2642 }
Daniel Veillard50582112001-03-26 22:52:16 +00002643 ctxt->input->line = line;
2644 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002645 xmlParseCharDataComplex(ctxt, cdata);
2646}
2647
2648void
2649xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002650 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2651 int nbchar = 0;
2652 int cur, l;
2653 int count = 0;
2654
2655 SHRINK;
2656 GROW;
2657 cur = CUR_CHAR(l);
2658 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2659 ((cur != '&') || (ctxt->token == '&')) &&
2660 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2661 if ((cur == ']') && (NXT(1) == ']') &&
2662 (NXT(2) == '>')) {
2663 if (cdata) break;
2664 else {
2665 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667 ctxt->sax->error(ctxt->userData,
2668 "Sequence ']]>' not allowed in content\n");
2669 /* Should this be relaxed ??? I see a "must here */
2670 ctxt->wellFormed = 0;
2671 ctxt->disableSAX = 1;
2672 }
2673 }
2674 COPY_BUF(l,buf,nbchar,cur);
2675 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002678 */
2679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2680 if (areBlanks(ctxt, buf, nbchar)) {
2681 if (ctxt->sax->ignorableWhitespace != NULL)
2682 ctxt->sax->ignorableWhitespace(ctxt->userData,
2683 buf, nbchar);
2684 } else {
2685 if (ctxt->sax->characters != NULL)
2686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2687 }
2688 }
2689 nbchar = 0;
2690 }
2691 count++;
2692 if (count > 50) {
2693 GROW;
2694 count = 0;
2695 }
2696 NEXTL(l);
2697 cur = CUR_CHAR(l);
2698 }
2699 if (nbchar != 0) {
2700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002702 */
2703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2704 if (areBlanks(ctxt, buf, nbchar)) {
2705 if (ctxt->sax->ignorableWhitespace != NULL)
2706 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2707 } else {
2708 if (ctxt->sax->characters != NULL)
2709 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2710 }
2711 }
2712 }
2713}
2714
2715/**
2716 * xmlParseExternalID:
2717 * @ctxt: an XML parser context
2718 * @publicID: a xmlChar** receiving PubidLiteral
2719 * @strict: indicate whether we should restrict parsing to only
2720 * production [75], see NOTE below
2721 *
2722 * Parse an External ID or a Public ID
2723 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002724 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002725 * 'PUBLIC' S PubidLiteral S SystemLiteral
2726 *
2727 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2728 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2731 *
2732 * Returns the function returns SystemLiteral and in the second
2733 * case publicID receives PubidLiteral, is strict is off
2734 * it is possible to return NULL and have publicID set.
2735 */
2736
2737xmlChar *
2738xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2739 xmlChar *URI = NULL;
2740
2741 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002742
2743 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002744 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2745 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2746 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2747 SKIP(6);
2748 if (!IS_BLANK(CUR)) {
2749 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2751 ctxt->sax->error(ctxt->userData,
2752 "Space required after 'SYSTEM'\n");
2753 ctxt->wellFormed = 0;
2754 ctxt->disableSAX = 1;
2755 }
2756 SKIP_BLANKS;
2757 URI = xmlParseSystemLiteral(ctxt);
2758 if (URI == NULL) {
2759 ctxt->errNo = XML_ERR_URI_REQUIRED;
2760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2761 ctxt->sax->error(ctxt->userData,
2762 "xmlParseExternalID: SYSTEM, no URI\n");
2763 ctxt->wellFormed = 0;
2764 ctxt->disableSAX = 1;
2765 }
2766 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2767 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2768 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2769 SKIP(6);
2770 if (!IS_BLANK(CUR)) {
2771 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2773 ctxt->sax->error(ctxt->userData,
2774 "Space required after 'PUBLIC'\n");
2775 ctxt->wellFormed = 0;
2776 ctxt->disableSAX = 1;
2777 }
2778 SKIP_BLANKS;
2779 *publicID = xmlParsePubidLiteral(ctxt);
2780 if (*publicID == NULL) {
2781 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt->userData,
2784 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2785 ctxt->wellFormed = 0;
2786 ctxt->disableSAX = 1;
2787 }
2788 if (strict) {
2789 /*
2790 * We don't handle [83] so "S SystemLiteral" is required.
2791 */
2792 if (!IS_BLANK(CUR)) {
2793 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795 ctxt->sax->error(ctxt->userData,
2796 "Space required after the Public Identifier\n");
2797 ctxt->wellFormed = 0;
2798 ctxt->disableSAX = 1;
2799 }
2800 } else {
2801 /*
2802 * We handle [83] so we return immediately, if
2803 * "S SystemLiteral" is not detected. From a purely parsing
2804 * point of view that's a nice mess.
2805 */
2806 const xmlChar *ptr;
2807 GROW;
2808
2809 ptr = CUR_PTR;
2810 if (!IS_BLANK(*ptr)) return(NULL);
2811
2812 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2813 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2814 }
2815 SKIP_BLANKS;
2816 URI = xmlParseSystemLiteral(ctxt);
2817 if (URI == NULL) {
2818 ctxt->errNo = XML_ERR_URI_REQUIRED;
2819 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2820 ctxt->sax->error(ctxt->userData,
2821 "xmlParseExternalID: PUBLIC, no URI\n");
2822 ctxt->wellFormed = 0;
2823 ctxt->disableSAX = 1;
2824 }
2825 }
2826 return(URI);
2827}
2828
2829/**
2830 * xmlParseComment:
2831 * @ctxt: an XML parser context
2832 *
2833 * Skip an XML (SGML) comment <!-- .... -->
2834 * The spec says that "For compatibility, the string "--" (double-hyphen)
2835 * must not occur within comments. "
2836 *
2837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2838 */
2839void
2840xmlParseComment(xmlParserCtxtPtr ctxt) {
2841 xmlChar *buf = NULL;
2842 int len;
2843 int size = XML_PARSER_BUFFER_SIZE;
2844 int q, ql;
2845 int r, rl;
2846 int cur, l;
2847 xmlParserInputState state;
2848 xmlParserInputPtr input = ctxt->input;
2849 int count = 0;
2850
2851 /*
2852 * Check that there is a comment right here.
2853 */
2854 if ((RAW != '<') || (NXT(1) != '!') ||
2855 (NXT(2) != '-') || (NXT(3) != '-')) return;
2856
2857 state = ctxt->instate;
2858 ctxt->instate = XML_PARSER_COMMENT;
2859 SHRINK;
2860 SKIP(4);
2861 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2862 if (buf == NULL) {
2863 xmlGenericError(xmlGenericErrorContext,
2864 "malloc of %d byte failed\n", size);
2865 ctxt->instate = state;
2866 return;
2867 }
2868 q = CUR_CHAR(ql);
2869 NEXTL(ql);
2870 r = CUR_CHAR(rl);
2871 NEXTL(rl);
2872 cur = CUR_CHAR(l);
2873 len = 0;
2874 while (IS_CHAR(cur) && /* checked */
2875 ((cur != '>') ||
2876 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002877 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002878 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "Comment must not contain '--' (double-hyphen)`\n");
2882 ctxt->wellFormed = 0;
2883 ctxt->disableSAX = 1;
2884 }
2885 if (len + 5 >= size) {
2886 size *= 2;
2887 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "realloc of %d byte failed\n", size);
2891 ctxt->instate = state;
2892 return;
2893 }
2894 }
2895 COPY_BUF(ql,buf,len,q);
2896 q = r;
2897 ql = rl;
2898 r = cur;
2899 rl = l;
2900
2901 count++;
2902 if (count > 50) {
2903 GROW;
2904 count = 0;
2905 }
2906 NEXTL(l);
2907 cur = CUR_CHAR(l);
2908 if (cur == 0) {
2909 SHRINK;
2910 GROW;
2911 cur = CUR_CHAR(l);
2912 }
2913 }
2914 buf[len] = 0;
2915 if (!IS_CHAR(cur)) {
2916 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2917 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2918 ctxt->sax->error(ctxt->userData,
2919 "Comment not terminated \n<!--%.50s\n", buf);
2920 ctxt->wellFormed = 0;
2921 ctxt->disableSAX = 1;
2922 xmlFree(buf);
2923 } else {
2924 if (input != ctxt->input) {
2925 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2927 ctxt->sax->error(ctxt->userData,
2928"Comment doesn't start and stop in the same entity\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 }
2932 NEXT;
2933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2934 (!ctxt->disableSAX))
2935 ctxt->sax->comment(ctxt->userData, buf);
2936 xmlFree(buf);
2937 }
2938 ctxt->instate = state;
2939}
2940
2941/**
2942 * xmlParsePITarget:
2943 * @ctxt: an XML parser context
2944 *
2945 * parse the name of a PI
2946 *
2947 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2948 *
2949 * Returns the PITarget name or NULL
2950 */
2951
2952xmlChar *
2953xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2954 xmlChar *name;
2955
2956 name = xmlParseName(ctxt);
2957 if ((name != NULL) &&
2958 ((name[0] == 'x') || (name[0] == 'X')) &&
2959 ((name[1] == 'm') || (name[1] == 'M')) &&
2960 ((name[2] == 'l') || (name[2] == 'L'))) {
2961 int i;
2962 if ((name[0] == 'x') && (name[1] == 'm') &&
2963 (name[2] == 'l') && (name[3] == 0)) {
2964 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2966 ctxt->sax->error(ctxt->userData,
2967 "XML declaration allowed only at the start of the document\n");
2968 ctxt->wellFormed = 0;
2969 ctxt->disableSAX = 1;
2970 return(name);
2971 } else if (name[3] == 0) {
2972 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2973 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2974 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2975 ctxt->wellFormed = 0;
2976 ctxt->disableSAX = 1;
2977 return(name);
2978 }
2979 for (i = 0;;i++) {
2980 if (xmlW3CPIs[i] == NULL) break;
2981 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2982 return(name);
2983 }
2984 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2985 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2986 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002987 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002988 }
2989 }
2990 return(name);
2991}
2992
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002993#ifdef LIBXML_CATALOG_ENABLED
2994/**
2995 * xmlParseCatalogPI:
2996 * @ctxt: an XML parser context
2997 * @catalog: the PI value string
2998 *
2999 * parse an XML Catalog Processing Instruction.
3000 *
3001 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3002 *
3003 * Occurs only if allowed by the user and if happening in the Misc
3004 * part of the document before any doctype informations
3005 * This will add the given catalog to the parsing context in order
3006 * to be used if there is a resolution need further down in the document
3007 */
3008
3009static void
3010xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3011 xmlChar *URL = NULL;
3012 const xmlChar *tmp, *base;
3013 xmlChar marker;
3014
3015 tmp = catalog;
3016 while (IS_BLANK(*tmp)) tmp++;
3017 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3018 goto error;
3019 tmp += 7;
3020 while (IS_BLANK(*tmp)) tmp++;
3021 if (*tmp != '=') {
3022 return;
3023 }
3024 tmp++;
3025 while (IS_BLANK(*tmp)) tmp++;
3026 marker = *tmp;
3027 if ((marker != '\'') && (marker != '"'))
3028 goto error;
3029 tmp++;
3030 base = tmp;
3031 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3032 if (*tmp == 0)
3033 goto error;
3034 URL = xmlStrndup(base, tmp - base);
3035 tmp++;
3036 while (IS_BLANK(*tmp)) tmp++;
3037 if (*tmp != 0)
3038 goto error;
3039
3040 if (URL != NULL) {
3041 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3042 xmlFree(URL);
3043 }
3044 return;
3045
3046error:
3047 ctxt->errNo = XML_WAR_CATALOG_PI;
3048 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3049 ctxt->sax->warning(ctxt->userData,
3050 "Catalog PI syntax error: %s\n", catalog);
3051 if (URL != NULL)
3052 xmlFree(URL);
3053}
3054#endif
3055
Owen Taylor3473f882001-02-23 17:55:21 +00003056/**
3057 * xmlParsePI:
3058 * @ctxt: an XML parser context
3059 *
3060 * parse an XML Processing Instruction.
3061 *
3062 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3063 *
3064 * The processing is transfered to SAX once parsed.
3065 */
3066
3067void
3068xmlParsePI(xmlParserCtxtPtr ctxt) {
3069 xmlChar *buf = NULL;
3070 int len = 0;
3071 int size = XML_PARSER_BUFFER_SIZE;
3072 int cur, l;
3073 xmlChar *target;
3074 xmlParserInputState state;
3075 int count = 0;
3076
3077 if ((RAW == '<') && (NXT(1) == '?')) {
3078 xmlParserInputPtr input = ctxt->input;
3079 state = ctxt->instate;
3080 ctxt->instate = XML_PARSER_PI;
3081 /*
3082 * this is a Processing Instruction.
3083 */
3084 SKIP(2);
3085 SHRINK;
3086
3087 /*
3088 * Parse the target name and check for special support like
3089 * namespace.
3090 */
3091 target = xmlParsePITarget(ctxt);
3092 if (target != NULL) {
3093 if ((RAW == '?') && (NXT(1) == '>')) {
3094 if (input != ctxt->input) {
3095 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3097 ctxt->sax->error(ctxt->userData,
3098 "PI declaration doesn't start and stop in the same entity\n");
3099 ctxt->wellFormed = 0;
3100 ctxt->disableSAX = 1;
3101 }
3102 SKIP(2);
3103
3104 /*
3105 * SAX: PI detected.
3106 */
3107 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3108 (ctxt->sax->processingInstruction != NULL))
3109 ctxt->sax->processingInstruction(ctxt->userData,
3110 target, NULL);
3111 ctxt->instate = state;
3112 xmlFree(target);
3113 return;
3114 }
3115 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3116 if (buf == NULL) {
3117 xmlGenericError(xmlGenericErrorContext,
3118 "malloc of %d byte failed\n", size);
3119 ctxt->instate = state;
3120 return;
3121 }
3122 cur = CUR;
3123 if (!IS_BLANK(cur)) {
3124 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3126 ctxt->sax->error(ctxt->userData,
3127 "xmlParsePI: PI %s space expected\n", target);
3128 ctxt->wellFormed = 0;
3129 ctxt->disableSAX = 1;
3130 }
3131 SKIP_BLANKS;
3132 cur = CUR_CHAR(l);
3133 while (IS_CHAR(cur) && /* checked */
3134 ((cur != '?') || (NXT(1) != '>'))) {
3135 if (len + 5 >= size) {
3136 size *= 2;
3137 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (buf == NULL) {
3139 xmlGenericError(xmlGenericErrorContext,
3140 "realloc of %d byte failed\n", size);
3141 ctxt->instate = state;
3142 return;
3143 }
3144 }
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 COPY_BUF(l,buf,len,cur);
3151 NEXTL(l);
3152 cur = CUR_CHAR(l);
3153 if (cur == 0) {
3154 SHRINK;
3155 GROW;
3156 cur = CUR_CHAR(l);
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != '?') {
3161 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164 "xmlParsePI: PI %s never end ...\n", target);
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 } else {
3168 if (input != ctxt->input) {
3169 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171 ctxt->sax->error(ctxt->userData,
3172 "PI declaration doesn't start and stop in the same entity\n");
3173 ctxt->wellFormed = 0;
3174 ctxt->disableSAX = 1;
3175 }
3176 SKIP(2);
3177
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003178#ifdef LIBXML_CATALOG_ENABLED
3179 if (((state == XML_PARSER_MISC) ||
3180 (state == XML_PARSER_START)) &&
3181 (xmlStrEqual(target, XML_CATALOG_PI))) {
3182 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3183 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3184 (allow == XML_CATA_ALLOW_ALL))
3185 xmlParseCatalogPI(ctxt, buf);
3186 }
3187#endif
3188
3189
Owen Taylor3473f882001-02-23 17:55:21 +00003190 /*
3191 * SAX: PI detected.
3192 */
3193 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3194 (ctxt->sax->processingInstruction != NULL))
3195 ctxt->sax->processingInstruction(ctxt->userData,
3196 target, buf);
3197 }
3198 xmlFree(buf);
3199 xmlFree(target);
3200 } else {
3201 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3203 ctxt->sax->error(ctxt->userData,
3204 "xmlParsePI : no target name\n");
3205 ctxt->wellFormed = 0;
3206 ctxt->disableSAX = 1;
3207 }
3208 ctxt->instate = state;
3209 }
3210}
3211
3212/**
3213 * xmlParseNotationDecl:
3214 * @ctxt: an XML parser context
3215 *
3216 * parse a notation declaration
3217 *
3218 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3219 *
3220 * Hence there is actually 3 choices:
3221 * 'PUBLIC' S PubidLiteral
3222 * 'PUBLIC' S PubidLiteral S SystemLiteral
3223 * and 'SYSTEM' S SystemLiteral
3224 *
3225 * See the NOTE on xmlParseExternalID().
3226 */
3227
3228void
3229xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3230 xmlChar *name;
3231 xmlChar *Pubid;
3232 xmlChar *Systemid;
3233
3234 if ((RAW == '<') && (NXT(1) == '!') &&
3235 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3236 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3237 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3238 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3239 xmlParserInputPtr input = ctxt->input;
3240 SHRINK;
3241 SKIP(10);
3242 if (!IS_BLANK(CUR)) {
3243 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3245 ctxt->sax->error(ctxt->userData,
3246 "Space required after '<!NOTATION'\n");
3247 ctxt->wellFormed = 0;
3248 ctxt->disableSAX = 1;
3249 return;
3250 }
3251 SKIP_BLANKS;
3252
Daniel Veillard76d66f42001-05-16 21:05:17 +00003253 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003254 if (name == NULL) {
3255 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3257 ctxt->sax->error(ctxt->userData,
3258 "NOTATION: Name expected here\n");
3259 ctxt->wellFormed = 0;
3260 ctxt->disableSAX = 1;
3261 return;
3262 }
3263 if (!IS_BLANK(CUR)) {
3264 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "Space required after the NOTATION name'\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 return;
3271 }
3272 SKIP_BLANKS;
3273
3274 /*
3275 * Parse the IDs.
3276 */
3277 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3278 SKIP_BLANKS;
3279
3280 if (RAW == '>') {
3281 if (input != ctxt->input) {
3282 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3284 ctxt->sax->error(ctxt->userData,
3285"Notation declaration doesn't start and stop in the same entity\n");
3286 ctxt->wellFormed = 0;
3287 ctxt->disableSAX = 1;
3288 }
3289 NEXT;
3290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3291 (ctxt->sax->notationDecl != NULL))
3292 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3293 } else {
3294 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3296 ctxt->sax->error(ctxt->userData,
3297 "'>' required to close NOTATION declaration\n");
3298 ctxt->wellFormed = 0;
3299 ctxt->disableSAX = 1;
3300 }
3301 xmlFree(name);
3302 if (Systemid != NULL) xmlFree(Systemid);
3303 if (Pubid != NULL) xmlFree(Pubid);
3304 }
3305}
3306
3307/**
3308 * xmlParseEntityDecl:
3309 * @ctxt: an XML parser context
3310 *
3311 * parse <!ENTITY declarations
3312 *
3313 * [70] EntityDecl ::= GEDecl | PEDecl
3314 *
3315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3316 *
3317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3318 *
3319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3320 *
3321 * [74] PEDef ::= EntityValue | ExternalID
3322 *
3323 * [76] NDataDecl ::= S 'NDATA' S Name
3324 *
3325 * [ VC: Notation Declared ]
3326 * The Name must match the declared name of a notation.
3327 */
3328
3329void
3330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3331 xmlChar *name = NULL;
3332 xmlChar *value = NULL;
3333 xmlChar *URI = NULL, *literal = NULL;
3334 xmlChar *ndata = NULL;
3335 int isParameter = 0;
3336 xmlChar *orig = NULL;
3337
3338 GROW;
3339 if ((RAW == '<') && (NXT(1) == '!') &&
3340 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3341 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3342 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3343 xmlParserInputPtr input = ctxt->input;
3344 ctxt->instate = XML_PARSER_ENTITY_DECL;
3345 SHRINK;
3346 SKIP(8);
3347 if (!IS_BLANK(CUR)) {
3348 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3350 ctxt->sax->error(ctxt->userData,
3351 "Space required after '<!ENTITY'\n");
3352 ctxt->wellFormed = 0;
3353 ctxt->disableSAX = 1;
3354 }
3355 SKIP_BLANKS;
3356
3357 if (RAW == '%') {
3358 NEXT;
3359 if (!IS_BLANK(CUR)) {
3360 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3362 ctxt->sax->error(ctxt->userData,
3363 "Space required after '%'\n");
3364 ctxt->wellFormed = 0;
3365 ctxt->disableSAX = 1;
3366 }
3367 SKIP_BLANKS;
3368 isParameter = 1;
3369 }
3370
Daniel Veillard76d66f42001-05-16 21:05:17 +00003371 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (name == NULL) {
3373 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3374 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3375 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3376 ctxt->wellFormed = 0;
3377 ctxt->disableSAX = 1;
3378 return;
3379 }
3380 if (!IS_BLANK(CUR)) {
3381 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3382 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3383 ctxt->sax->error(ctxt->userData,
3384 "Space required after the entity name\n");
3385 ctxt->wellFormed = 0;
3386 ctxt->disableSAX = 1;
3387 }
3388 SKIP_BLANKS;
3389
3390 /*
3391 * handle the various case of definitions...
3392 */
3393 if (isParameter) {
3394 if ((RAW == '"') || (RAW == '\'')) {
3395 value = xmlParseEntityValue(ctxt, &orig);
3396 if (value) {
3397 if ((ctxt->sax != NULL) &&
3398 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3399 ctxt->sax->entityDecl(ctxt->userData, name,
3400 XML_INTERNAL_PARAMETER_ENTITY,
3401 NULL, NULL, value);
3402 }
3403 } else {
3404 URI = xmlParseExternalID(ctxt, &literal, 1);
3405 if ((URI == NULL) && (literal == NULL)) {
3406 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "Entity value required\n");
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 }
3413 if (URI) {
3414 xmlURIPtr uri;
3415
3416 uri = xmlParseURI((const char *) URI);
3417 if (uri == NULL) {
3418 ctxt->errNo = XML_ERR_INVALID_URI;
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) &&
3421 (ctxt->sax->error != NULL))
3422 ctxt->sax->error(ctxt->userData,
3423 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003424 /*
3425 * This really ought to be a well formedness error
3426 * but the XML Core WG decided otherwise c.f. issue
3427 * E26 of the XML erratas.
3428 */
Owen Taylor3473f882001-02-23 17:55:21 +00003429 } else {
3430 if (uri->fragment != NULL) {
3431 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3432 if ((ctxt->sax != NULL) &&
3433 (!ctxt->disableSAX) &&
3434 (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt->userData,
3436 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003437 /*
3438 * Okay this is foolish to block those but not
3439 * invalid URIs.
3440 */
Owen Taylor3473f882001-02-23 17:55:21 +00003441 ctxt->wellFormed = 0;
3442 } else {
3443 if ((ctxt->sax != NULL) &&
3444 (!ctxt->disableSAX) &&
3445 (ctxt->sax->entityDecl != NULL))
3446 ctxt->sax->entityDecl(ctxt->userData, name,
3447 XML_EXTERNAL_PARAMETER_ENTITY,
3448 literal, URI, NULL);
3449 }
3450 xmlFreeURI(uri);
3451 }
3452 }
3453 }
3454 } else {
3455 if ((RAW == '"') || (RAW == '\'')) {
3456 value = xmlParseEntityValue(ctxt, &orig);
3457 if ((ctxt->sax != NULL) &&
3458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3459 ctxt->sax->entityDecl(ctxt->userData, name,
3460 XML_INTERNAL_GENERAL_ENTITY,
3461 NULL, NULL, value);
3462 } else {
3463 URI = xmlParseExternalID(ctxt, &literal, 1);
3464 if ((URI == NULL) && (literal == NULL)) {
3465 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3467 ctxt->sax->error(ctxt->userData,
3468 "Entity value required\n");
3469 ctxt->wellFormed = 0;
3470 ctxt->disableSAX = 1;
3471 }
3472 if (URI) {
3473 xmlURIPtr uri;
3474
3475 uri = xmlParseURI((const char *)URI);
3476 if (uri == NULL) {
3477 ctxt->errNo = XML_ERR_INVALID_URI;
3478 if ((ctxt->sax != NULL) &&
3479 (!ctxt->disableSAX) &&
3480 (ctxt->sax->error != NULL))
3481 ctxt->sax->error(ctxt->userData,
3482 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003483 /*
3484 * This really ought to be a well formedness error
3485 * but the XML Core WG decided otherwise c.f. issue
3486 * E26 of the XML erratas.
3487 */
Owen Taylor3473f882001-02-23 17:55:21 +00003488 } else {
3489 if (uri->fragment != NULL) {
3490 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3491 if ((ctxt->sax != NULL) &&
3492 (!ctxt->disableSAX) &&
3493 (ctxt->sax->error != NULL))
3494 ctxt->sax->error(ctxt->userData,
3495 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 /*
3497 * Okay this is foolish to block those but not
3498 * invalid URIs.
3499 */
Owen Taylor3473f882001-02-23 17:55:21 +00003500 ctxt->wellFormed = 0;
3501 }
3502 xmlFreeURI(uri);
3503 }
3504 }
3505 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3508 ctxt->sax->error(ctxt->userData,
3509 "Space required before 'NDATA'\n");
3510 ctxt->wellFormed = 0;
3511 ctxt->disableSAX = 1;
3512 }
3513 SKIP_BLANKS;
3514 if ((RAW == 'N') && (NXT(1) == 'D') &&
3515 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3516 (NXT(4) == 'A')) {
3517 SKIP(5);
3518 if (!IS_BLANK(CUR)) {
3519 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "Space required after 'NDATA'\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 }
3526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003527 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003528 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3529 (ctxt->sax->unparsedEntityDecl != NULL))
3530 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3531 literal, URI, ndata);
3532 } else {
3533 if ((ctxt->sax != NULL) &&
3534 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3535 ctxt->sax->entityDecl(ctxt->userData, name,
3536 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3537 literal, URI, NULL);
3538 }
3539 }
3540 }
3541 SKIP_BLANKS;
3542 if (RAW != '>') {
3543 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3545 ctxt->sax->error(ctxt->userData,
3546 "xmlParseEntityDecl: entity %s not terminated\n", name);
3547 ctxt->wellFormed = 0;
3548 ctxt->disableSAX = 1;
3549 } else {
3550 if (input != ctxt->input) {
3551 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3553 ctxt->sax->error(ctxt->userData,
3554"Entity declaration doesn't start and stop in the same entity\n");
3555 ctxt->wellFormed = 0;
3556 ctxt->disableSAX = 1;
3557 }
3558 NEXT;
3559 }
3560 if (orig != NULL) {
3561 /*
3562 * Ugly mechanism to save the raw entity value.
3563 */
3564 xmlEntityPtr cur = NULL;
3565
3566 if (isParameter) {
3567 if ((ctxt->sax != NULL) &&
3568 (ctxt->sax->getParameterEntity != NULL))
3569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3570 } else {
3571 if ((ctxt->sax != NULL) &&
3572 (ctxt->sax->getEntity != NULL))
3573 cur = ctxt->sax->getEntity(ctxt->userData, name);
3574 }
3575 if (cur != NULL) {
3576 if (cur->orig != NULL)
3577 xmlFree(orig);
3578 else
3579 cur->orig = orig;
3580 } else
3581 xmlFree(orig);
3582 }
3583 if (name != NULL) xmlFree(name);
3584 if (value != NULL) xmlFree(value);
3585 if (URI != NULL) xmlFree(URI);
3586 if (literal != NULL) xmlFree(literal);
3587 if (ndata != NULL) xmlFree(ndata);
3588 }
3589}
3590
3591/**
3592 * xmlParseDefaultDecl:
3593 * @ctxt: an XML parser context
3594 * @value: Receive a possible fixed default value for the attribute
3595 *
3596 * Parse an attribute default declaration
3597 *
3598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3599 *
3600 * [ VC: Required Attribute ]
3601 * if the default declaration is the keyword #REQUIRED, then the
3602 * attribute must be specified for all elements of the type in the
3603 * attribute-list declaration.
3604 *
3605 * [ VC: Attribute Default Legal ]
3606 * The declared default value must meet the lexical constraints of
3607 * the declared attribute type c.f. xmlValidateAttributeDecl()
3608 *
3609 * [ VC: Fixed Attribute Default ]
3610 * if an attribute has a default value declared with the #FIXED
3611 * keyword, instances of that attribute must match the default value.
3612 *
3613 * [ WFC: No < in Attribute Values ]
3614 * handled in xmlParseAttValue()
3615 *
3616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3617 * or XML_ATTRIBUTE_FIXED.
3618 */
3619
3620int
3621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3622 int val;
3623 xmlChar *ret;
3624
3625 *value = NULL;
3626 if ((RAW == '#') && (NXT(1) == 'R') &&
3627 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3628 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3629 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3630 (NXT(8) == 'D')) {
3631 SKIP(9);
3632 return(XML_ATTRIBUTE_REQUIRED);
3633 }
3634 if ((RAW == '#') && (NXT(1) == 'I') &&
3635 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3636 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3637 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3638 SKIP(8);
3639 return(XML_ATTRIBUTE_IMPLIED);
3640 }
3641 val = XML_ATTRIBUTE_NONE;
3642 if ((RAW == '#') && (NXT(1) == 'F') &&
3643 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3644 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3645 SKIP(6);
3646 val = XML_ATTRIBUTE_FIXED;
3647 if (!IS_BLANK(CUR)) {
3648 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3650 ctxt->sax->error(ctxt->userData,
3651 "Space required after '#FIXED'\n");
3652 ctxt->wellFormed = 0;
3653 ctxt->disableSAX = 1;
3654 }
3655 SKIP_BLANKS;
3656 }
3657 ret = xmlParseAttValue(ctxt);
3658 ctxt->instate = XML_PARSER_DTD;
3659 if (ret == NULL) {
3660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3661 ctxt->sax->error(ctxt->userData,
3662 "Attribute default value declaration error\n");
3663 ctxt->wellFormed = 0;
3664 ctxt->disableSAX = 1;
3665 } else
3666 *value = ret;
3667 return(val);
3668}
3669
3670/**
3671 * xmlParseNotationType:
3672 * @ctxt: an XML parser context
3673 *
3674 * parse an Notation attribute type.
3675 *
3676 * Note: the leading 'NOTATION' S part has already being parsed...
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 * [ VC: Notation Attributes ]
3681 * Values of this type must match one of the notation names included
3682 * in the declaration; all notation names in the declaration must be declared.
3683 *
3684 * Returns: the notation attribute tree built while parsing
3685 */
3686
3687xmlEnumerationPtr
3688xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3689 xmlChar *name;
3690 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3691
3692 if (RAW != '(') {
3693 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3694 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695 ctxt->sax->error(ctxt->userData,
3696 "'(' required to start 'NOTATION'\n");
3697 ctxt->wellFormed = 0;
3698 ctxt->disableSAX = 1;
3699 return(NULL);
3700 }
3701 SHRINK;
3702 do {
3703 NEXT;
3704 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003705 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 if (name == NULL) {
3707 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709 ctxt->sax->error(ctxt->userData,
3710 "Name expected in NOTATION declaration\n");
3711 ctxt->wellFormed = 0;
3712 ctxt->disableSAX = 1;
3713 return(ret);
3714 }
3715 cur = xmlCreateEnumeration(name);
3716 xmlFree(name);
3717 if (cur == NULL) return(ret);
3718 if (last == NULL) ret = last = cur;
3719 else {
3720 last->next = cur;
3721 last = cur;
3722 }
3723 SKIP_BLANKS;
3724 } while (RAW == '|');
3725 if (RAW != ')') {
3726 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "')' required to finish NOTATION declaration\n");
3730 ctxt->wellFormed = 0;
3731 ctxt->disableSAX = 1;
3732 if ((last != NULL) && (last != ret))
3733 xmlFreeEnumeration(last);
3734 return(ret);
3735 }
3736 NEXT;
3737 return(ret);
3738}
3739
3740/**
3741 * xmlParseEnumerationType:
3742 * @ctxt: an XML parser context
3743 *
3744 * parse an Enumeration attribute type.
3745 *
3746 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3747 *
3748 * [ VC: Enumeration ]
3749 * Values of this type must match one of the Nmtoken tokens in
3750 * the declaration
3751 *
3752 * Returns: the enumeration attribute tree built while parsing
3753 */
3754
3755xmlEnumerationPtr
3756xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3757 xmlChar *name;
3758 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3759
3760 if (RAW != '(') {
3761 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3763 ctxt->sax->error(ctxt->userData,
3764 "'(' required to start ATTLIST enumeration\n");
3765 ctxt->wellFormed = 0;
3766 ctxt->disableSAX = 1;
3767 return(NULL);
3768 }
3769 SHRINK;
3770 do {
3771 NEXT;
3772 SKIP_BLANKS;
3773 name = xmlParseNmtoken(ctxt);
3774 if (name == NULL) {
3775 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "NmToken expected in ATTLIST enumeration\n");
3779 ctxt->wellFormed = 0;
3780 ctxt->disableSAX = 1;
3781 return(ret);
3782 }
3783 cur = xmlCreateEnumeration(name);
3784 xmlFree(name);
3785 if (cur == NULL) return(ret);
3786 if (last == NULL) ret = last = cur;
3787 else {
3788 last->next = cur;
3789 last = cur;
3790 }
3791 SKIP_BLANKS;
3792 } while (RAW == '|');
3793 if (RAW != ')') {
3794 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3796 ctxt->sax->error(ctxt->userData,
3797 "')' required to finish ATTLIST enumeration\n");
3798 ctxt->wellFormed = 0;
3799 ctxt->disableSAX = 1;
3800 return(ret);
3801 }
3802 NEXT;
3803 return(ret);
3804}
3805
3806/**
3807 * xmlParseEnumeratedType:
3808 * @ctxt: an XML parser context
3809 * @tree: the enumeration tree built while parsing
3810 *
3811 * parse an Enumerated attribute type.
3812 *
3813 * [57] EnumeratedType ::= NotationType | Enumeration
3814 *
3815 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3816 *
3817 *
3818 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3819 */
3820
3821int
3822xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3823 if ((RAW == 'N') && (NXT(1) == 'O') &&
3824 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3825 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3826 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3827 SKIP(8);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after 'NOTATION'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 return(0);
3836 }
3837 SKIP_BLANKS;
3838 *tree = xmlParseNotationType(ctxt);
3839 if (*tree == NULL) return(0);
3840 return(XML_ATTRIBUTE_NOTATION);
3841 }
3842 *tree = xmlParseEnumerationType(ctxt);
3843 if (*tree == NULL) return(0);
3844 return(XML_ATTRIBUTE_ENUMERATION);
3845}
3846
3847/**
3848 * xmlParseAttributeType:
3849 * @ctxt: an XML parser context
3850 * @tree: the enumeration tree built while parsing
3851 *
3852 * parse the Attribute list def for an element
3853 *
3854 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3855 *
3856 * [55] StringType ::= 'CDATA'
3857 *
3858 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3859 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3860 *
3861 * Validity constraints for attribute values syntax are checked in
3862 * xmlValidateAttributeValue()
3863 *
3864 * [ VC: ID ]
3865 * Values of type ID must match the Name production. A name must not
3866 * appear more than once in an XML document as a value of this type;
3867 * i.e., ID values must uniquely identify the elements which bear them.
3868 *
3869 * [ VC: One ID per Element Type ]
3870 * No element type may have more than one ID attribute specified.
3871 *
3872 * [ VC: ID Attribute Default ]
3873 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3874 *
3875 * [ VC: IDREF ]
3876 * Values of type IDREF must match the Name production, and values
3877 * of type IDREFS must match Names; each IDREF Name must match the value
3878 * of an ID attribute on some element in the XML document; i.e. IDREF
3879 * values must match the value of some ID attribute.
3880 *
3881 * [ VC: Entity Name ]
3882 * Values of type ENTITY must match the Name production, values
3883 * of type ENTITIES must match Names; each Entity Name must match the
3884 * name of an unparsed entity declared in the DTD.
3885 *
3886 * [ VC: Name Token ]
3887 * Values of type NMTOKEN must match the Nmtoken production; values
3888 * of type NMTOKENS must match Nmtokens.
3889 *
3890 * Returns the attribute type
3891 */
3892int
3893xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3894 SHRINK;
3895 if ((RAW == 'C') && (NXT(1) == 'D') &&
3896 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3897 (NXT(4) == 'A')) {
3898 SKIP(5);
3899 return(XML_ATTRIBUTE_CDATA);
3900 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3901 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3902 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3903 SKIP(6);
3904 return(XML_ATTRIBUTE_IDREFS);
3905 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3906 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3907 (NXT(4) == 'F')) {
3908 SKIP(5);
3909 return(XML_ATTRIBUTE_IDREF);
3910 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3911 SKIP(2);
3912 return(XML_ATTRIBUTE_ID);
3913 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3914 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3915 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3916 SKIP(6);
3917 return(XML_ATTRIBUTE_ENTITY);
3918 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3919 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3920 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3921 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3922 SKIP(8);
3923 return(XML_ATTRIBUTE_ENTITIES);
3924 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3925 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3926 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3927 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3928 SKIP(8);
3929 return(XML_ATTRIBUTE_NMTOKENS);
3930 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3931 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3932 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3933 (NXT(6) == 'N')) {
3934 SKIP(7);
3935 return(XML_ATTRIBUTE_NMTOKEN);
3936 }
3937 return(xmlParseEnumeratedType(ctxt, tree));
3938}
3939
3940/**
3941 * xmlParseAttributeListDecl:
3942 * @ctxt: an XML parser context
3943 *
3944 * : parse the Attribute list def for an element
3945 *
3946 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3947 *
3948 * [53] AttDef ::= S Name S AttType S DefaultDecl
3949 *
3950 */
3951void
3952xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3953 xmlChar *elemName;
3954 xmlChar *attrName;
3955 xmlEnumerationPtr tree;
3956
3957 if ((RAW == '<') && (NXT(1) == '!') &&
3958 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3960 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3961 (NXT(8) == 'T')) {
3962 xmlParserInputPtr input = ctxt->input;
3963
3964 SKIP(9);
3965 if (!IS_BLANK(CUR)) {
3966 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3968 ctxt->sax->error(ctxt->userData,
3969 "Space required after '<!ATTLIST'\n");
3970 ctxt->wellFormed = 0;
3971 ctxt->disableSAX = 1;
3972 }
3973 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003974 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (elemName == NULL) {
3976 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3978 ctxt->sax->error(ctxt->userData,
3979 "ATTLIST: no name for Element\n");
3980 ctxt->wellFormed = 0;
3981 ctxt->disableSAX = 1;
3982 return;
3983 }
3984 SKIP_BLANKS;
3985 GROW;
3986 while (RAW != '>') {
3987 const xmlChar *check = CUR_PTR;
3988 int type;
3989 int def;
3990 xmlChar *defaultValue = NULL;
3991
3992 GROW;
3993 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003994 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (attrName == NULL) {
3996 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3998 ctxt->sax->error(ctxt->userData,
3999 "ATTLIST: no name for Attribute\n");
4000 ctxt->wellFormed = 0;
4001 ctxt->disableSAX = 1;
4002 break;
4003 }
4004 GROW;
4005 if (!IS_BLANK(CUR)) {
4006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4008 ctxt->sax->error(ctxt->userData,
4009 "Space required after the attribute name\n");
4010 ctxt->wellFormed = 0;
4011 ctxt->disableSAX = 1;
4012 if (attrName != NULL)
4013 xmlFree(attrName);
4014 if (defaultValue != NULL)
4015 xmlFree(defaultValue);
4016 break;
4017 }
4018 SKIP_BLANKS;
4019
4020 type = xmlParseAttributeType(ctxt, &tree);
4021 if (type <= 0) {
4022 if (attrName != NULL)
4023 xmlFree(attrName);
4024 if (defaultValue != NULL)
4025 xmlFree(defaultValue);
4026 break;
4027 }
4028
4029 GROW;
4030 if (!IS_BLANK(CUR)) {
4031 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4033 ctxt->sax->error(ctxt->userData,
4034 "Space required after the attribute type\n");
4035 ctxt->wellFormed = 0;
4036 ctxt->disableSAX = 1;
4037 if (attrName != NULL)
4038 xmlFree(attrName);
4039 if (defaultValue != NULL)
4040 xmlFree(defaultValue);
4041 if (tree != NULL)
4042 xmlFreeEnumeration(tree);
4043 break;
4044 }
4045 SKIP_BLANKS;
4046
4047 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4048 if (def <= 0) {
4049 if (attrName != NULL)
4050 xmlFree(attrName);
4051 if (defaultValue != NULL)
4052 xmlFree(defaultValue);
4053 if (tree != NULL)
4054 xmlFreeEnumeration(tree);
4055 break;
4056 }
4057
4058 GROW;
4059 if (RAW != '>') {
4060 if (!IS_BLANK(CUR)) {
4061 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4062 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4063 ctxt->sax->error(ctxt->userData,
4064 "Space required after the attribute default value\n");
4065 ctxt->wellFormed = 0;
4066 ctxt->disableSAX = 1;
4067 if (attrName != NULL)
4068 xmlFree(attrName);
4069 if (defaultValue != NULL)
4070 xmlFree(defaultValue);
4071 if (tree != NULL)
4072 xmlFreeEnumeration(tree);
4073 break;
4074 }
4075 SKIP_BLANKS;
4076 }
4077 if (check == CUR_PTR) {
4078 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080 ctxt->sax->error(ctxt->userData,
4081 "xmlParseAttributeListDecl: detected internal error\n");
4082 if (attrName != NULL)
4083 xmlFree(attrName);
4084 if (defaultValue != NULL)
4085 xmlFree(defaultValue);
4086 if (tree != NULL)
4087 xmlFreeEnumeration(tree);
4088 break;
4089 }
4090 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4091 (ctxt->sax->attributeDecl != NULL))
4092 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4093 type, def, defaultValue, tree);
4094 if (attrName != NULL)
4095 xmlFree(attrName);
4096 if (defaultValue != NULL)
4097 xmlFree(defaultValue);
4098 GROW;
4099 }
4100 if (RAW == '>') {
4101 if (input != ctxt->input) {
4102 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4104 ctxt->sax->error(ctxt->userData,
4105"Attribute list declaration doesn't start and stop in the same entity\n");
4106 ctxt->wellFormed = 0;
4107 ctxt->disableSAX = 1;
4108 }
4109 NEXT;
4110 }
4111
4112 xmlFree(elemName);
4113 }
4114}
4115
4116/**
4117 * xmlParseElementMixedContentDecl:
4118 * @ctxt: an XML parser context
4119 *
4120 * parse the declaration for a Mixed Element content
4121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4122 *
4123 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4124 * '(' S? '#PCDATA' S? ')'
4125 *
4126 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4127 *
4128 * [ VC: No Duplicate Types ]
4129 * The same name must not appear more than once in a single
4130 * mixed-content declaration.
4131 *
4132 * returns: the list of the xmlElementContentPtr describing the element choices
4133 */
4134xmlElementContentPtr
4135xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4136 xmlElementContentPtr ret = NULL, cur = NULL, n;
4137 xmlChar *elem = NULL;
4138
4139 GROW;
4140 if ((RAW == '#') && (NXT(1) == 'P') &&
4141 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4142 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4143 (NXT(6) == 'A')) {
4144 SKIP(7);
4145 SKIP_BLANKS;
4146 SHRINK;
4147 if (RAW == ')') {
4148 ctxt->entity = ctxt->input;
4149 NEXT;
4150 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4151 if (RAW == '*') {
4152 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4153 NEXT;
4154 }
4155 return(ret);
4156 }
4157 if ((RAW == '(') || (RAW == '|')) {
4158 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4159 if (ret == NULL) return(NULL);
4160 }
4161 while (RAW == '|') {
4162 NEXT;
4163 if (elem == NULL) {
4164 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4165 if (ret == NULL) return(NULL);
4166 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004167 if (cur != NULL)
4168 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004169 cur = ret;
4170 } else {
4171 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4172 if (n == NULL) return(NULL);
4173 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004174 if (n->c1 != NULL)
4175 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004176 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004177 if (n != NULL)
4178 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004179 cur = n;
4180 xmlFree(elem);
4181 }
4182 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004183 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 if (elem == NULL) {
4185 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4187 ctxt->sax->error(ctxt->userData,
4188 "xmlParseElementMixedContentDecl : Name expected\n");
4189 ctxt->wellFormed = 0;
4190 ctxt->disableSAX = 1;
4191 xmlFreeElementContent(cur);
4192 return(NULL);
4193 }
4194 SKIP_BLANKS;
4195 GROW;
4196 }
4197 if ((RAW == ')') && (NXT(1) == '*')) {
4198 if (elem != NULL) {
4199 cur->c2 = xmlNewElementContent(elem,
4200 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004201 if (cur->c2 != NULL)
4202 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004203 xmlFree(elem);
4204 }
4205 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4206 ctxt->entity = ctxt->input;
4207 SKIP(2);
4208 } else {
4209 if (elem != NULL) xmlFree(elem);
4210 xmlFreeElementContent(ret);
4211 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4213 ctxt->sax->error(ctxt->userData,
4214 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4215 ctxt->wellFormed = 0;
4216 ctxt->disableSAX = 1;
4217 return(NULL);
4218 }
4219
4220 } else {
4221 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4223 ctxt->sax->error(ctxt->userData,
4224 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4225 ctxt->wellFormed = 0;
4226 ctxt->disableSAX = 1;
4227 }
4228 return(ret);
4229}
4230
4231/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004232 * xmlParseElementChildrenContentD:
4233 * @ctxt: an XML parser context
4234 *
4235 * VMS version of xmlParseElementChildrenContentDecl()
4236 *
4237 * Returns the tree of xmlElementContentPtr describing the element
4238 * hierarchy.
4239 */
4240/**
Owen Taylor3473f882001-02-23 17:55:21 +00004241 * xmlParseElementChildrenContentDecl:
4242 * @ctxt: an XML parser context
4243 *
4244 * parse the declaration for a Mixed Element content
4245 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4246 *
4247 *
4248 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4249 *
4250 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4251 *
4252 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4253 *
4254 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4255 *
4256 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4257 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004258 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004259 * opening or closing parentheses in a choice, seq, or Mixed
4260 * construct is contained in the replacement text for a parameter
4261 * entity, both must be contained in the same replacement text. For
4262 * interoperability, if a parameter-entity reference appears in a
4263 * choice, seq, or Mixed construct, its replacement text should not
4264 * be empty, and neither the first nor last non-blank character of
4265 * the replacement text should be a connector (| or ,).
4266 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004267 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004268 * hierarchy.
4269 */
4270xmlElementContentPtr
4271#ifdef VMS
4272xmlParseElementChildrenContentD
4273#else
4274xmlParseElementChildrenContentDecl
4275#endif
4276(xmlParserCtxtPtr ctxt) {
4277 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4278 xmlChar *elem;
4279 xmlChar type = 0;
4280
4281 SKIP_BLANKS;
4282 GROW;
4283 if (RAW == '(') {
4284 /* Recurse on first child */
4285 NEXT;
4286 SKIP_BLANKS;
4287 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4288 SKIP_BLANKS;
4289 GROW;
4290 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004291 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004292 if (elem == NULL) {
4293 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4294 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4295 ctxt->sax->error(ctxt->userData,
4296 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4297 ctxt->wellFormed = 0;
4298 ctxt->disableSAX = 1;
4299 return(NULL);
4300 }
4301 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4302 GROW;
4303 if (RAW == '?') {
4304 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4305 NEXT;
4306 } else if (RAW == '*') {
4307 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4308 NEXT;
4309 } else if (RAW == '+') {
4310 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4311 NEXT;
4312 } else {
4313 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4314 }
4315 xmlFree(elem);
4316 GROW;
4317 }
4318 SKIP_BLANKS;
4319 SHRINK;
4320 while (RAW != ')') {
4321 /*
4322 * Each loop we parse one separator and one element.
4323 */
4324 if (RAW == ',') {
4325 if (type == 0) type = CUR;
4326
4327 /*
4328 * Detect "Name | Name , Name" error
4329 */
4330 else if (type != CUR) {
4331 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333 ctxt->sax->error(ctxt->userData,
4334 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4335 type);
4336 ctxt->wellFormed = 0;
4337 ctxt->disableSAX = 1;
4338 if ((op != NULL) && (op != ret))
4339 xmlFreeElementContent(op);
4340 if ((last != NULL) && (last != ret) &&
4341 (last != ret->c1) && (last != ret->c2))
4342 xmlFreeElementContent(last);
4343 if (ret != NULL)
4344 xmlFreeElementContent(ret);
4345 return(NULL);
4346 }
4347 NEXT;
4348
4349 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4350 if (op == NULL) {
4351 xmlFreeElementContent(ret);
4352 return(NULL);
4353 }
4354 if (last == NULL) {
4355 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004356 if (ret != NULL)
4357 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 ret = cur = op;
4359 } else {
4360 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004361 if (op != NULL)
4362 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004364 if (last != NULL)
4365 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004366 cur =op;
4367 last = NULL;
4368 }
4369 } else if (RAW == '|') {
4370 if (type == 0) type = CUR;
4371
4372 /*
4373 * Detect "Name , Name | Name" error
4374 */
4375 else if (type != CUR) {
4376 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4378 ctxt->sax->error(ctxt->userData,
4379 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4380 type);
4381 ctxt->wellFormed = 0;
4382 ctxt->disableSAX = 1;
4383 if ((op != NULL) && (op != ret) && (op != last))
4384 xmlFreeElementContent(op);
4385 if ((last != NULL) && (last != ret) &&
4386 (last != ret->c1) && (last != ret->c2))
4387 xmlFreeElementContent(last);
4388 if (ret != NULL)
4389 xmlFreeElementContent(ret);
4390 return(NULL);
4391 }
4392 NEXT;
4393
4394 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4395 if (op == NULL) {
4396 if ((op != NULL) && (op != ret))
4397 xmlFreeElementContent(op);
4398 if ((last != NULL) && (last != ret) &&
4399 (last != ret->c1) && (last != ret->c2))
4400 xmlFreeElementContent(last);
4401 if (ret != NULL)
4402 xmlFreeElementContent(ret);
4403 return(NULL);
4404 }
4405 if (last == NULL) {
4406 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004407 if (ret != NULL)
4408 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004409 ret = cur = op;
4410 } else {
4411 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004412 if (op != NULL)
4413 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004415 if (last != NULL)
4416 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 cur =op;
4418 last = NULL;
4419 }
4420 } else {
4421 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4422 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4423 ctxt->sax->error(ctxt->userData,
4424 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4425 ctxt->wellFormed = 0;
4426 ctxt->disableSAX = 1;
4427 if ((op != NULL) && (op != ret))
4428 xmlFreeElementContent(op);
4429 if ((last != NULL) && (last != ret) &&
4430 (last != ret->c1) && (last != ret->c2))
4431 xmlFreeElementContent(last);
4432 if (ret != NULL)
4433 xmlFreeElementContent(ret);
4434 return(NULL);
4435 }
4436 GROW;
4437 SKIP_BLANKS;
4438 GROW;
4439 if (RAW == '(') {
4440 /* Recurse on second child */
4441 NEXT;
4442 SKIP_BLANKS;
4443 last = xmlParseElementChildrenContentDecl(ctxt);
4444 SKIP_BLANKS;
4445 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004446 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 if (elem == NULL) {
4448 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4449 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4450 ctxt->sax->error(ctxt->userData,
4451 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4452 ctxt->wellFormed = 0;
4453 ctxt->disableSAX = 1;
4454 if ((op != NULL) && (op != ret))
4455 xmlFreeElementContent(op);
4456 if ((last != NULL) && (last != ret) &&
4457 (last != ret->c1) && (last != ret->c2))
4458 xmlFreeElementContent(last);
4459 if (ret != NULL)
4460 xmlFreeElementContent(ret);
4461 return(NULL);
4462 }
4463 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4464 xmlFree(elem);
4465 if (RAW == '?') {
4466 last->ocur = XML_ELEMENT_CONTENT_OPT;
4467 NEXT;
4468 } else if (RAW == '*') {
4469 last->ocur = XML_ELEMENT_CONTENT_MULT;
4470 NEXT;
4471 } else if (RAW == '+') {
4472 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4473 NEXT;
4474 } else {
4475 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4476 }
4477 }
4478 SKIP_BLANKS;
4479 GROW;
4480 }
4481 if ((cur != NULL) && (last != NULL)) {
4482 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004483 if (last != NULL)
4484 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004485 }
4486 ctxt->entity = ctxt->input;
4487 NEXT;
4488 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004489 if (ret != NULL)
4490 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004491 NEXT;
4492 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004493 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004494 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004495 cur = ret;
4496 /*
4497 * Some normalization:
4498 * (a | b* | c?)* == (a | b | c)*
4499 */
4500 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4501 if ((cur->c1 != NULL) &&
4502 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4503 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4504 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4505 if ((cur->c2 != NULL) &&
4506 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4507 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4508 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4509 cur = cur->c2;
4510 }
4511 }
Owen Taylor3473f882001-02-23 17:55:21 +00004512 NEXT;
4513 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004514 if (ret != NULL) {
4515 int found = 0;
4516
Daniel Veillarde470df72001-04-18 21:41:07 +00004517 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004518 /*
4519 * Some normalization:
4520 * (a | b*)+ == (a | b)*
4521 * (a | b?)+ == (a | b)*
4522 */
4523 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4524 if ((cur->c1 != NULL) &&
4525 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4526 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4527 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4528 found = 1;
4529 }
4530 if ((cur->c2 != NULL) &&
4531 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4532 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4533 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4534 found = 1;
4535 }
4536 cur = cur->c2;
4537 }
4538 if (found)
4539 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4540 }
Owen Taylor3473f882001-02-23 17:55:21 +00004541 NEXT;
4542 }
4543 return(ret);
4544}
4545
4546/**
4547 * xmlParseElementContentDecl:
4548 * @ctxt: an XML parser context
4549 * @name: the name of the element being defined.
4550 * @result: the Element Content pointer will be stored here if any
4551 *
4552 * parse the declaration for an Element content either Mixed or Children,
4553 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4554 *
4555 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4556 *
4557 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4558 */
4559
4560int
4561xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4562 xmlElementContentPtr *result) {
4563
4564 xmlElementContentPtr tree = NULL;
4565 xmlParserInputPtr input = ctxt->input;
4566 int res;
4567
4568 *result = NULL;
4569
4570 if (RAW != '(') {
4571 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4573 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004574 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004575 ctxt->wellFormed = 0;
4576 ctxt->disableSAX = 1;
4577 return(-1);
4578 }
4579 NEXT;
4580 GROW;
4581 SKIP_BLANKS;
4582 if ((RAW == '#') && (NXT(1) == 'P') &&
4583 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4584 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4585 (NXT(6) == 'A')) {
4586 tree = xmlParseElementMixedContentDecl(ctxt);
4587 res = XML_ELEMENT_TYPE_MIXED;
4588 } else {
4589 tree = xmlParseElementChildrenContentDecl(ctxt);
4590 res = XML_ELEMENT_TYPE_ELEMENT;
4591 }
4592 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4593 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4595 ctxt->sax->error(ctxt->userData,
4596"Element content declaration doesn't start and stop in the same entity\n");
4597 ctxt->wellFormed = 0;
4598 ctxt->disableSAX = 1;
4599 }
4600 SKIP_BLANKS;
4601 *result = tree;
4602 return(res);
4603}
4604
4605/**
4606 * xmlParseElementDecl:
4607 * @ctxt: an XML parser context
4608 *
4609 * parse an Element declaration.
4610 *
4611 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4612 *
4613 * [ VC: Unique Element Type Declaration ]
4614 * No element type may be declared more than once
4615 *
4616 * Returns the type of the element, or -1 in case of error
4617 */
4618int
4619xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4620 xmlChar *name;
4621 int ret = -1;
4622 xmlElementContentPtr content = NULL;
4623
4624 GROW;
4625 if ((RAW == '<') && (NXT(1) == '!') &&
4626 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4627 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4628 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4629 (NXT(8) == 'T')) {
4630 xmlParserInputPtr input = ctxt->input;
4631
4632 SKIP(9);
4633 if (!IS_BLANK(CUR)) {
4634 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4636 ctxt->sax->error(ctxt->userData,
4637 "Space required after 'ELEMENT'\n");
4638 ctxt->wellFormed = 0;
4639 ctxt->disableSAX = 1;
4640 }
4641 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004642 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004643 if (name == NULL) {
4644 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4646 ctxt->sax->error(ctxt->userData,
4647 "xmlParseElementDecl: no name for Element\n");
4648 ctxt->wellFormed = 0;
4649 ctxt->disableSAX = 1;
4650 return(-1);
4651 }
4652 while ((RAW == 0) && (ctxt->inputNr > 1))
4653 xmlPopInput(ctxt);
4654 if (!IS_BLANK(CUR)) {
4655 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4657 ctxt->sax->error(ctxt->userData,
4658 "Space required after the element name\n");
4659 ctxt->wellFormed = 0;
4660 ctxt->disableSAX = 1;
4661 }
4662 SKIP_BLANKS;
4663 if ((RAW == 'E') && (NXT(1) == 'M') &&
4664 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4665 (NXT(4) == 'Y')) {
4666 SKIP(5);
4667 /*
4668 * Element must always be empty.
4669 */
4670 ret = XML_ELEMENT_TYPE_EMPTY;
4671 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4672 (NXT(2) == 'Y')) {
4673 SKIP(3);
4674 /*
4675 * Element is a generic container.
4676 */
4677 ret = XML_ELEMENT_TYPE_ANY;
4678 } else if (RAW == '(') {
4679 ret = xmlParseElementContentDecl(ctxt, name, &content);
4680 } else {
4681 /*
4682 * [ WFC: PEs in Internal Subset ] error handling.
4683 */
4684 if ((RAW == '%') && (ctxt->external == 0) &&
4685 (ctxt->inputNr == 1)) {
4686 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "PEReference: forbidden within markup decl in internal subset\n");
4690 } else {
4691 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4693 ctxt->sax->error(ctxt->userData,
4694 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4695 }
4696 ctxt->wellFormed = 0;
4697 ctxt->disableSAX = 1;
4698 if (name != NULL) xmlFree(name);
4699 return(-1);
4700 }
4701
4702 SKIP_BLANKS;
4703 /*
4704 * Pop-up of finished entities.
4705 */
4706 while ((RAW == 0) && (ctxt->inputNr > 1))
4707 xmlPopInput(ctxt);
4708 SKIP_BLANKS;
4709
4710 if (RAW != '>') {
4711 ctxt->errNo = XML_ERR_GT_REQUIRED;
4712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4713 ctxt->sax->error(ctxt->userData,
4714 "xmlParseElementDecl: expected '>' at the end\n");
4715 ctxt->wellFormed = 0;
4716 ctxt->disableSAX = 1;
4717 } else {
4718 if (input != ctxt->input) {
4719 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722"Element declaration doesn't start and stop in the same entity\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 }
4726
4727 NEXT;
4728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4729 (ctxt->sax->elementDecl != NULL))
4730 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4731 content);
4732 }
4733 if (content != NULL) {
4734 xmlFreeElementContent(content);
4735 }
4736 if (name != NULL) {
4737 xmlFree(name);
4738 }
4739 }
4740 return(ret);
4741}
4742
4743/**
Owen Taylor3473f882001-02-23 17:55:21 +00004744 * xmlParseConditionalSections
4745 * @ctxt: an XML parser context
4746 *
4747 * [61] conditionalSect ::= includeSect | ignoreSect
4748 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4749 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4750 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4751 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4752 */
4753
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004754static void
Owen Taylor3473f882001-02-23 17:55:21 +00004755xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4756 SKIP(3);
4757 SKIP_BLANKS;
4758 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4759 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4760 (NXT(6) == 'E')) {
4761 SKIP(7);
4762 SKIP_BLANKS;
4763 if (RAW != '[') {
4764 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4766 ctxt->sax->error(ctxt->userData,
4767 "XML conditional section '[' expected\n");
4768 ctxt->wellFormed = 0;
4769 ctxt->disableSAX = 1;
4770 } else {
4771 NEXT;
4772 }
4773 if (xmlParserDebugEntities) {
4774 if ((ctxt->input != NULL) && (ctxt->input->filename))
4775 xmlGenericError(xmlGenericErrorContext,
4776 "%s(%d): ", ctxt->input->filename,
4777 ctxt->input->line);
4778 xmlGenericError(xmlGenericErrorContext,
4779 "Entering INCLUDE Conditional Section\n");
4780 }
4781
4782 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4783 (NXT(2) != '>'))) {
4784 const xmlChar *check = CUR_PTR;
4785 int cons = ctxt->input->consumed;
4786 int tok = ctxt->token;
4787
4788 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4789 xmlParseConditionalSections(ctxt);
4790 } else if (IS_BLANK(CUR)) {
4791 NEXT;
4792 } else if (RAW == '%') {
4793 xmlParsePEReference(ctxt);
4794 } else
4795 xmlParseMarkupDecl(ctxt);
4796
4797 /*
4798 * Pop-up of finished entities.
4799 */
4800 while ((RAW == 0) && (ctxt->inputNr > 1))
4801 xmlPopInput(ctxt);
4802
4803 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4804 (tok == ctxt->token)) {
4805 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807 ctxt->sax->error(ctxt->userData,
4808 "Content error in the external subset\n");
4809 ctxt->wellFormed = 0;
4810 ctxt->disableSAX = 1;
4811 break;
4812 }
4813 }
4814 if (xmlParserDebugEntities) {
4815 if ((ctxt->input != NULL) && (ctxt->input->filename))
4816 xmlGenericError(xmlGenericErrorContext,
4817 "%s(%d): ", ctxt->input->filename,
4818 ctxt->input->line);
4819 xmlGenericError(xmlGenericErrorContext,
4820 "Leaving INCLUDE Conditional Section\n");
4821 }
4822
4823 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4824 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4825 int state;
4826 int instate;
4827 int depth = 0;
4828
4829 SKIP(6);
4830 SKIP_BLANKS;
4831 if (RAW != '[') {
4832 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4834 ctxt->sax->error(ctxt->userData,
4835 "XML conditional section '[' expected\n");
4836 ctxt->wellFormed = 0;
4837 ctxt->disableSAX = 1;
4838 } else {
4839 NEXT;
4840 }
4841 if (xmlParserDebugEntities) {
4842 if ((ctxt->input != NULL) && (ctxt->input->filename))
4843 xmlGenericError(xmlGenericErrorContext,
4844 "%s(%d): ", ctxt->input->filename,
4845 ctxt->input->line);
4846 xmlGenericError(xmlGenericErrorContext,
4847 "Entering IGNORE Conditional Section\n");
4848 }
4849
4850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004851 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004852 * But disable SAX event generating DTD building in the meantime
4853 */
4854 state = ctxt->disableSAX;
4855 instate = ctxt->instate;
4856 ctxt->disableSAX = 1;
4857 ctxt->instate = XML_PARSER_IGNORE;
4858
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004859 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4861 depth++;
4862 SKIP(3);
4863 continue;
4864 }
4865 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4866 if (--depth >= 0) SKIP(3);
4867 continue;
4868 }
4869 NEXT;
4870 continue;
4871 }
4872
4873 ctxt->disableSAX = state;
4874 ctxt->instate = instate;
4875
4876 if (xmlParserDebugEntities) {
4877 if ((ctxt->input != NULL) && (ctxt->input->filename))
4878 xmlGenericError(xmlGenericErrorContext,
4879 "%s(%d): ", ctxt->input->filename,
4880 ctxt->input->line);
4881 xmlGenericError(xmlGenericErrorContext,
4882 "Leaving IGNORE Conditional Section\n");
4883 }
4884
4885 } else {
4886 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4888 ctxt->sax->error(ctxt->userData,
4889 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4890 ctxt->wellFormed = 0;
4891 ctxt->disableSAX = 1;
4892 }
4893
4894 if (RAW == 0)
4895 SHRINK;
4896
4897 if (RAW == 0) {
4898 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4899 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4900 ctxt->sax->error(ctxt->userData,
4901 "XML conditional section not closed\n");
4902 ctxt->wellFormed = 0;
4903 ctxt->disableSAX = 1;
4904 } else {
4905 SKIP(3);
4906 }
4907}
4908
4909/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004910 * xmlParseMarkupDecl:
4911 * @ctxt: an XML parser context
4912 *
4913 * parse Markup declarations
4914 *
4915 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4916 * NotationDecl | PI | Comment
4917 *
4918 * [ VC: Proper Declaration/PE Nesting ]
4919 * Parameter-entity replacement text must be properly nested with
4920 * markup declarations. That is to say, if either the first character
4921 * or the last character of a markup declaration (markupdecl above) is
4922 * contained in the replacement text for a parameter-entity reference,
4923 * both must be contained in the same replacement text.
4924 *
4925 * [ WFC: PEs in Internal Subset ]
4926 * In the internal DTD subset, parameter-entity references can occur
4927 * only where markup declarations can occur, not within markup declarations.
4928 * (This does not apply to references that occur in external parameter
4929 * entities or to the external subset.)
4930 */
4931void
4932xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4933 GROW;
4934 xmlParseElementDecl(ctxt);
4935 xmlParseAttributeListDecl(ctxt);
4936 xmlParseEntityDecl(ctxt);
4937 xmlParseNotationDecl(ctxt);
4938 xmlParsePI(ctxt);
4939 xmlParseComment(ctxt);
4940 /*
4941 * This is only for internal subset. On external entities,
4942 * the replacement is done before parsing stage
4943 */
4944 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4945 xmlParsePEReference(ctxt);
4946
4947 /*
4948 * Conditional sections are allowed from entities included
4949 * by PE References in the internal subset.
4950 */
4951 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4952 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4953 xmlParseConditionalSections(ctxt);
4954 }
4955 }
4956
4957 ctxt->instate = XML_PARSER_DTD;
4958}
4959
4960/**
4961 * xmlParseTextDecl:
4962 * @ctxt: an XML parser context
4963 *
4964 * parse an XML declaration header for external entities
4965 *
4966 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4967 *
4968 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4969 */
4970
4971void
4972xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4973 xmlChar *version;
4974
4975 /*
4976 * We know that '<?xml' is here.
4977 */
4978 if ((RAW == '<') && (NXT(1) == '?') &&
4979 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4980 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4981 SKIP(5);
4982 } else {
4983 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4985 ctxt->sax->error(ctxt->userData,
4986 "Text declaration '<?xml' required\n");
4987 ctxt->wellFormed = 0;
4988 ctxt->disableSAX = 1;
4989
4990 return;
4991 }
4992
4993 if (!IS_BLANK(CUR)) {
4994 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4996 ctxt->sax->error(ctxt->userData,
4997 "Space needed after '<?xml'\n");
4998 ctxt->wellFormed = 0;
4999 ctxt->disableSAX = 1;
5000 }
5001 SKIP_BLANKS;
5002
5003 /*
5004 * We may have the VersionInfo here.
5005 */
5006 version = xmlParseVersionInfo(ctxt);
5007 if (version == NULL)
5008 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005009 else {
5010 if (!IS_BLANK(CUR)) {
5011 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5013 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5014 ctxt->wellFormed = 0;
5015 ctxt->disableSAX = 1;
5016 }
5017 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005018 ctxt->input->version = version;
5019
5020 /*
5021 * We must have the encoding declaration
5022 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005023 xmlParseEncodingDecl(ctxt);
5024 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5025 /*
5026 * The XML REC instructs us to stop parsing right here
5027 */
5028 return;
5029 }
5030
5031 SKIP_BLANKS;
5032 if ((RAW == '?') && (NXT(1) == '>')) {
5033 SKIP(2);
5034 } else if (RAW == '>') {
5035 /* Deprecated old WD ... */
5036 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "XML declaration must end-up with '?>'\n");
5040 ctxt->wellFormed = 0;
5041 ctxt->disableSAX = 1;
5042 NEXT;
5043 } else {
5044 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046 ctxt->sax->error(ctxt->userData,
5047 "parsing XML declaration: '?>' expected\n");
5048 ctxt->wellFormed = 0;
5049 ctxt->disableSAX = 1;
5050 MOVETO_ENDTAG(CUR_PTR);
5051 NEXT;
5052 }
5053}
5054
5055/**
Owen Taylor3473f882001-02-23 17:55:21 +00005056 * xmlParseExternalSubset:
5057 * @ctxt: an XML parser context
5058 * @ExternalID: the external identifier
5059 * @SystemID: the system identifier (or URL)
5060 *
5061 * parse Markup declarations from an external subset
5062 *
5063 * [30] extSubset ::= textDecl? extSubsetDecl
5064 *
5065 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5066 */
5067void
5068xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5069 const xmlChar *SystemID) {
5070 GROW;
5071 if ((RAW == '<') && (NXT(1) == '?') &&
5072 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5073 (NXT(4) == 'l')) {
5074 xmlParseTextDecl(ctxt);
5075 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5076 /*
5077 * The XML REC instructs us to stop parsing right here
5078 */
5079 ctxt->instate = XML_PARSER_EOF;
5080 return;
5081 }
5082 }
5083 if (ctxt->myDoc == NULL) {
5084 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5085 }
5086 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5087 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5088
5089 ctxt->instate = XML_PARSER_DTD;
5090 ctxt->external = 1;
5091 while (((RAW == '<') && (NXT(1) == '?')) ||
5092 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005093 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 const xmlChar *check = CUR_PTR;
5095 int cons = ctxt->input->consumed;
5096 int tok = ctxt->token;
5097
5098 GROW;
5099 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5100 xmlParseConditionalSections(ctxt);
5101 } else if (IS_BLANK(CUR)) {
5102 NEXT;
5103 } else if (RAW == '%') {
5104 xmlParsePEReference(ctxt);
5105 } else
5106 xmlParseMarkupDecl(ctxt);
5107
5108 /*
5109 * Pop-up of finished entities.
5110 */
5111 while ((RAW == 0) && (ctxt->inputNr > 1))
5112 xmlPopInput(ctxt);
5113
5114 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5115 (tok == ctxt->token)) {
5116 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5118 ctxt->sax->error(ctxt->userData,
5119 "Content error in the external subset\n");
5120 ctxt->wellFormed = 0;
5121 ctxt->disableSAX = 1;
5122 break;
5123 }
5124 }
5125
5126 if (RAW != 0) {
5127 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5129 ctxt->sax->error(ctxt->userData,
5130 "Extra content at the end of the document\n");
5131 ctxt->wellFormed = 0;
5132 ctxt->disableSAX = 1;
5133 }
5134
5135}
5136
5137/**
5138 * xmlParseReference:
5139 * @ctxt: an XML parser context
5140 *
5141 * parse and handle entity references in content, depending on the SAX
5142 * interface, this may end-up in a call to character() if this is a
5143 * CharRef, a predefined entity, if there is no reference() callback.
5144 * or if the parser was asked to switch to that mode.
5145 *
5146 * [67] Reference ::= EntityRef | CharRef
5147 */
5148void
5149xmlParseReference(xmlParserCtxtPtr ctxt) {
5150 xmlEntityPtr ent;
5151 xmlChar *val;
5152 if (RAW != '&') return;
5153
5154 if (NXT(1) == '#') {
5155 int i = 0;
5156 xmlChar out[10];
5157 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005158 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005159
5160 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5161 /*
5162 * So we are using non-UTF-8 buffers
5163 * Check that the char fit on 8bits, if not
5164 * generate a CharRef.
5165 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005166 if (value <= 0xFF) {
5167 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 out[1] = 0;
5169 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5170 (!ctxt->disableSAX))
5171 ctxt->sax->characters(ctxt->userData, out, 1);
5172 } else {
5173 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005174 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005175 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005176 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005177 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5178 (!ctxt->disableSAX))
5179 ctxt->sax->reference(ctxt->userData, out);
5180 }
5181 } else {
5182 /*
5183 * Just encode the value in UTF-8
5184 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005185 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 out[i] = 0;
5187 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5188 (!ctxt->disableSAX))
5189 ctxt->sax->characters(ctxt->userData, out, i);
5190 }
5191 } else {
5192 ent = xmlParseEntityRef(ctxt);
5193 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005194 if (!ctxt->wellFormed)
5195 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if ((ent->name != NULL) &&
5197 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5198 xmlNodePtr list = NULL;
5199 int ret;
5200
5201
5202 /*
5203 * The first reference to the entity trigger a parsing phase
5204 * where the ent->children is filled with the result from
5205 * the parsing.
5206 */
5207 if (ent->children == NULL) {
5208 xmlChar *value;
5209 value = ent->content;
5210
5211 /*
5212 * Check that this entity is well formed
5213 */
5214 if ((value != NULL) &&
5215 (value[1] == 0) && (value[0] == '<') &&
5216 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5217 /*
5218 * DONE: get definite answer on this !!!
5219 * Lots of entity decls are used to declare a single
5220 * char
5221 * <!ENTITY lt "<">
5222 * Which seems to be valid since
5223 * 2.4: The ampersand character (&) and the left angle
5224 * bracket (<) may appear in their literal form only
5225 * when used ... They are also legal within the literal
5226 * entity value of an internal entity declaration;i
5227 * see "4.3.2 Well-Formed Parsed Entities".
5228 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5229 * Looking at the OASIS test suite and James Clark
5230 * tests, this is broken. However the XML REC uses
5231 * it. Is the XML REC not well-formed ????
5232 * This is a hack to avoid this problem
5233 *
5234 * ANSWER: since lt gt amp .. are already defined,
5235 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005236 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005237 * is lousy but acceptable.
5238 */
5239 list = xmlNewDocText(ctxt->myDoc, value);
5240 if (list != NULL) {
5241 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5242 (ent->children == NULL)) {
5243 ent->children = list;
5244 ent->last = list;
5245 list->parent = (xmlNodePtr) ent;
5246 } else {
5247 xmlFreeNodeList(list);
5248 }
5249 } else if (list != NULL) {
5250 xmlFreeNodeList(list);
5251 }
5252 } else {
5253 /*
5254 * 4.3.2: An internal general parsed entity is well-formed
5255 * if its replacement text matches the production labeled
5256 * content.
5257 */
5258 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5259 ctxt->depth++;
5260 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5261 ctxt->sax, NULL, ctxt->depth,
5262 value, &list);
5263 ctxt->depth--;
5264 } else if (ent->etype ==
5265 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5266 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005267 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005268 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005269 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 ctxt->depth--;
5271 } else {
5272 ret = -1;
5273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5274 ctxt->sax->error(ctxt->userData,
5275 "Internal: invalid entity type\n");
5276 }
5277 if (ret == XML_ERR_ENTITY_LOOP) {
5278 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "Detected entity reference loop\n");
5282 ctxt->wellFormed = 0;
5283 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005284 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005285 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005286 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5287 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005288 (ent->children == NULL)) {
5289 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005290 if (ctxt->replaceEntities) {
5291 /*
5292 * Prune it directly in the generated document
5293 * except for single text nodes.
5294 */
5295 if ((list->type == XML_TEXT_NODE) &&
5296 (list->next == NULL)) {
5297 list->parent = (xmlNodePtr) ent;
5298 list = NULL;
5299 } else {
5300 while (list != NULL) {
5301 list->parent = (xmlNodePtr) ctxt->node;
5302 if (list->next == NULL)
5303 ent->last = list;
5304 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005305 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005306 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005307 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5308 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005309 }
5310 } else {
5311 while (list != NULL) {
5312 list->parent = (xmlNodePtr) ent;
5313 if (list->next == NULL)
5314 ent->last = list;
5315 list = list->next;
5316 }
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
5318 } else {
5319 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005320 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 }
5322 } else if (ret > 0) {
5323 ctxt->errNo = ret;
5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5325 ctxt->sax->error(ctxt->userData,
5326 "Entity value required\n");
5327 ctxt->wellFormed = 0;
5328 ctxt->disableSAX = 1;
5329 } else if (list != NULL) {
5330 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005331 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 }
5333 }
5334 }
5335 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5336 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5337 /*
5338 * Create a node.
5339 */
5340 ctxt->sax->reference(ctxt->userData, ent->name);
5341 return;
5342 } else if (ctxt->replaceEntities) {
5343 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5344 /*
5345 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005346 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005347 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005348 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005349 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005350 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005351 cur = ent->children;
5352 while (cur != NULL) {
5353 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005354 if (firstChild == NULL){
5355 firstChild = new;
5356 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005357 xmlAddChild(ctxt->node, new);
5358 if (cur == ent->last)
5359 break;
5360 cur = cur->next;
5361 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005362 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5363 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005364 } else {
5365 /*
5366 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005367 * node with a possible previous text one which
5368 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005369 */
5370 if (ent->children->type == XML_TEXT_NODE)
5371 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5372 if ((ent->last != ent->children) &&
5373 (ent->last->type == XML_TEXT_NODE))
5374 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5375 xmlAddChildList(ctxt->node, ent->children);
5376 }
5377
Owen Taylor3473f882001-02-23 17:55:21 +00005378 /*
5379 * This is to avoid a nasty side effect, see
5380 * characters() in SAX.c
5381 */
5382 ctxt->nodemem = 0;
5383 ctxt->nodelen = 0;
5384 return;
5385 } else {
5386 /*
5387 * Probably running in SAX mode
5388 */
5389 xmlParserInputPtr input;
5390
5391 input = xmlNewEntityInputStream(ctxt, ent);
5392 xmlPushInput(ctxt, input);
5393 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5394 (RAW == '<') && (NXT(1) == '?') &&
5395 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5396 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5397 xmlParseTextDecl(ctxt);
5398 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5399 /*
5400 * The XML REC instructs us to stop parsing right here
5401 */
5402 ctxt->instate = XML_PARSER_EOF;
5403 return;
5404 }
5405 if (input->standalone == 1) {
5406 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5408 ctxt->sax->error(ctxt->userData,
5409 "external parsed entities cannot be standalone\n");
5410 ctxt->wellFormed = 0;
5411 ctxt->disableSAX = 1;
5412 }
5413 }
5414 return;
5415 }
5416 }
5417 } else {
5418 val = ent->content;
5419 if (val == NULL) return;
5420 /*
5421 * inline the entity.
5422 */
5423 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5424 (!ctxt->disableSAX))
5425 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5426 }
5427 }
5428}
5429
5430/**
5431 * xmlParseEntityRef:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse ENTITY references declarations
5435 *
5436 * [68] EntityRef ::= '&' Name ';'
5437 *
5438 * [ WFC: Entity Declared ]
5439 * In a document without any DTD, a document with only an internal DTD
5440 * subset which contains no parameter entity references, or a document
5441 * with "standalone='yes'", the Name given in the entity reference
5442 * must match that in an entity declaration, except that well-formed
5443 * documents need not declare any of the following entities: amp, lt,
5444 * gt, apos, quot. The declaration of a parameter entity must precede
5445 * any reference to it. Similarly, the declaration of a general entity
5446 * must precede any reference to it which appears in a default value in an
5447 * attribute-list declaration. Note that if entities are declared in the
5448 * external subset or in external parameter entities, a non-validating
5449 * processor is not obligated to read and process their declarations;
5450 * for such documents, the rule that an entity must be declared is a
5451 * well-formedness constraint only if standalone='yes'.
5452 *
5453 * [ WFC: Parsed Entity ]
5454 * An entity reference must not contain the name of an unparsed entity
5455 *
5456 * Returns the xmlEntityPtr if found, or NULL otherwise.
5457 */
5458xmlEntityPtr
5459xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5460 xmlChar *name;
5461 xmlEntityPtr ent = NULL;
5462
5463 GROW;
5464
5465 if (RAW == '&') {
5466 NEXT;
5467 name = xmlParseName(ctxt);
5468 if (name == NULL) {
5469 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5471 ctxt->sax->error(ctxt->userData,
5472 "xmlParseEntityRef: no name\n");
5473 ctxt->wellFormed = 0;
5474 ctxt->disableSAX = 1;
5475 } else {
5476 if (RAW == ';') {
5477 NEXT;
5478 /*
5479 * Ask first SAX for entity resolution, otherwise try the
5480 * predefined set.
5481 */
5482 if (ctxt->sax != NULL) {
5483 if (ctxt->sax->getEntity != NULL)
5484 ent = ctxt->sax->getEntity(ctxt->userData, name);
5485 if (ent == NULL)
5486 ent = xmlGetPredefinedEntity(name);
5487 }
5488 /*
5489 * [ WFC: Entity Declared ]
5490 * In a document without any DTD, a document with only an
5491 * internal DTD subset which contains no parameter entity
5492 * references, or a document with "standalone='yes'", the
5493 * Name given in the entity reference must match that in an
5494 * entity declaration, except that well-formed documents
5495 * need not declare any of the following entities: amp, lt,
5496 * gt, apos, quot.
5497 * The declaration of a parameter entity must precede any
5498 * reference to it.
5499 * Similarly, the declaration of a general entity must
5500 * precede any reference to it which appears in a default
5501 * value in an attribute-list declaration. Note that if
5502 * entities are declared in the external subset or in
5503 * external parameter entities, a non-validating processor
5504 * is not obligated to read and process their declarations;
5505 * for such documents, the rule that an entity must be
5506 * declared is a well-formedness constraint only if
5507 * standalone='yes'.
5508 */
5509 if (ent == NULL) {
5510 if ((ctxt->standalone == 1) ||
5511 ((ctxt->hasExternalSubset == 0) &&
5512 (ctxt->hasPErefs == 0))) {
5513 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5515 ctxt->sax->error(ctxt->userData,
5516 "Entity '%s' not defined\n", name);
5517 ctxt->wellFormed = 0;
5518 ctxt->disableSAX = 1;
5519 } else {
5520 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005522 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005523 "Entity '%s' not defined\n", name);
5524 }
5525 }
5526
5527 /*
5528 * [ WFC: Parsed Entity ]
5529 * An entity reference must not contain the name of an
5530 * unparsed entity
5531 */
5532 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5533 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5535 ctxt->sax->error(ctxt->userData,
5536 "Entity reference to unparsed entity %s\n", name);
5537 ctxt->wellFormed = 0;
5538 ctxt->disableSAX = 1;
5539 }
5540
5541 /*
5542 * [ WFC: No External Entity References ]
5543 * Attribute values cannot contain direct or indirect
5544 * entity references to external entities.
5545 */
5546 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5547 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5548 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5550 ctxt->sax->error(ctxt->userData,
5551 "Attribute references external entity '%s'\n", name);
5552 ctxt->wellFormed = 0;
5553 ctxt->disableSAX = 1;
5554 }
5555 /*
5556 * [ WFC: No < in Attribute Values ]
5557 * The replacement text of any entity referred to directly or
5558 * indirectly in an attribute value (other than "&lt;") must
5559 * not contain a <.
5560 */
5561 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5562 (ent != NULL) &&
5563 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5564 (ent->content != NULL) &&
5565 (xmlStrchr(ent->content, '<'))) {
5566 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5568 ctxt->sax->error(ctxt->userData,
5569 "'<' in entity '%s' is not allowed in attributes values\n", name);
5570 ctxt->wellFormed = 0;
5571 ctxt->disableSAX = 1;
5572 }
5573
5574 /*
5575 * Internal check, no parameter entities here ...
5576 */
5577 else {
5578 switch (ent->etype) {
5579 case XML_INTERNAL_PARAMETER_ENTITY:
5580 case XML_EXTERNAL_PARAMETER_ENTITY:
5581 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5583 ctxt->sax->error(ctxt->userData,
5584 "Attempt to reference the parameter entity '%s'\n", name);
5585 ctxt->wellFormed = 0;
5586 ctxt->disableSAX = 1;
5587 break;
5588 default:
5589 break;
5590 }
5591 }
5592
5593 /*
5594 * [ WFC: No Recursion ]
5595 * A parsed entity must not contain a recursive reference
5596 * to itself, either directly or indirectly.
5597 * Done somewhere else
5598 */
5599
5600 } else {
5601 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5603 ctxt->sax->error(ctxt->userData,
5604 "xmlParseEntityRef: expecting ';'\n");
5605 ctxt->wellFormed = 0;
5606 ctxt->disableSAX = 1;
5607 }
5608 xmlFree(name);
5609 }
5610 }
5611 return(ent);
5612}
5613
5614/**
5615 * xmlParseStringEntityRef:
5616 * @ctxt: an XML parser context
5617 * @str: a pointer to an index in the string
5618 *
5619 * parse ENTITY references declarations, but this version parses it from
5620 * a string value.
5621 *
5622 * [68] EntityRef ::= '&' Name ';'
5623 *
5624 * [ WFC: Entity Declared ]
5625 * In a document without any DTD, a document with only an internal DTD
5626 * subset which contains no parameter entity references, or a document
5627 * with "standalone='yes'", the Name given in the entity reference
5628 * must match that in an entity declaration, except that well-formed
5629 * documents need not declare any of the following entities: amp, lt,
5630 * gt, apos, quot. The declaration of a parameter entity must precede
5631 * any reference to it. Similarly, the declaration of a general entity
5632 * must precede any reference to it which appears in a default value in an
5633 * attribute-list declaration. Note that if entities are declared in the
5634 * external subset or in external parameter entities, a non-validating
5635 * processor is not obligated to read and process their declarations;
5636 * for such documents, the rule that an entity must be declared is a
5637 * well-formedness constraint only if standalone='yes'.
5638 *
5639 * [ WFC: Parsed Entity ]
5640 * An entity reference must not contain the name of an unparsed entity
5641 *
5642 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5643 * is updated to the current location in the string.
5644 */
5645xmlEntityPtr
5646xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5647 xmlChar *name;
5648 const xmlChar *ptr;
5649 xmlChar cur;
5650 xmlEntityPtr ent = NULL;
5651
5652 if ((str == NULL) || (*str == NULL))
5653 return(NULL);
5654 ptr = *str;
5655 cur = *ptr;
5656 if (cur == '&') {
5657 ptr++;
5658 cur = *ptr;
5659 name = xmlParseStringName(ctxt, &ptr);
5660 if (name == NULL) {
5661 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5663 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005664 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005665 ctxt->wellFormed = 0;
5666 ctxt->disableSAX = 1;
5667 } else {
5668 if (*ptr == ';') {
5669 ptr++;
5670 /*
5671 * Ask first SAX for entity resolution, otherwise try the
5672 * predefined set.
5673 */
5674 if (ctxt->sax != NULL) {
5675 if (ctxt->sax->getEntity != NULL)
5676 ent = ctxt->sax->getEntity(ctxt->userData, name);
5677 if (ent == NULL)
5678 ent = xmlGetPredefinedEntity(name);
5679 }
5680 /*
5681 * [ WFC: Entity Declared ]
5682 * In a document without any DTD, a document with only an
5683 * internal DTD subset which contains no parameter entity
5684 * references, or a document with "standalone='yes'", the
5685 * Name given in the entity reference must match that in an
5686 * entity declaration, except that well-formed documents
5687 * need not declare any of the following entities: amp, lt,
5688 * gt, apos, quot.
5689 * The declaration of a parameter entity must precede any
5690 * reference to it.
5691 * Similarly, the declaration of a general entity must
5692 * precede any reference to it which appears in a default
5693 * value in an attribute-list declaration. Note that if
5694 * entities are declared in the external subset or in
5695 * external parameter entities, a non-validating processor
5696 * is not obligated to read and process their declarations;
5697 * for such documents, the rule that an entity must be
5698 * declared is a well-formedness constraint only if
5699 * standalone='yes'.
5700 */
5701 if (ent == NULL) {
5702 if ((ctxt->standalone == 1) ||
5703 ((ctxt->hasExternalSubset == 0) &&
5704 (ctxt->hasPErefs == 0))) {
5705 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5707 ctxt->sax->error(ctxt->userData,
5708 "Entity '%s' not defined\n", name);
5709 ctxt->wellFormed = 0;
5710 ctxt->disableSAX = 1;
5711 } else {
5712 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5713 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5714 ctxt->sax->warning(ctxt->userData,
5715 "Entity '%s' not defined\n", name);
5716 }
5717 }
5718
5719 /*
5720 * [ WFC: Parsed Entity ]
5721 * An entity reference must not contain the name of an
5722 * unparsed entity
5723 */
5724 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5725 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5727 ctxt->sax->error(ctxt->userData,
5728 "Entity reference to unparsed entity %s\n", name);
5729 ctxt->wellFormed = 0;
5730 ctxt->disableSAX = 1;
5731 }
5732
5733 /*
5734 * [ WFC: No External Entity References ]
5735 * Attribute values cannot contain direct or indirect
5736 * entity references to external entities.
5737 */
5738 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5739 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5740 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5742 ctxt->sax->error(ctxt->userData,
5743 "Attribute references external entity '%s'\n", name);
5744 ctxt->wellFormed = 0;
5745 ctxt->disableSAX = 1;
5746 }
5747 /*
5748 * [ WFC: No < in Attribute Values ]
5749 * The replacement text of any entity referred to directly or
5750 * indirectly in an attribute value (other than "&lt;") must
5751 * not contain a <.
5752 */
5753 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5754 (ent != NULL) &&
5755 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5756 (ent->content != NULL) &&
5757 (xmlStrchr(ent->content, '<'))) {
5758 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5760 ctxt->sax->error(ctxt->userData,
5761 "'<' in entity '%s' is not allowed in attributes values\n", name);
5762 ctxt->wellFormed = 0;
5763 ctxt->disableSAX = 1;
5764 }
5765
5766 /*
5767 * Internal check, no parameter entities here ...
5768 */
5769 else {
5770 switch (ent->etype) {
5771 case XML_INTERNAL_PARAMETER_ENTITY:
5772 case XML_EXTERNAL_PARAMETER_ENTITY:
5773 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5775 ctxt->sax->error(ctxt->userData,
5776 "Attempt to reference the parameter entity '%s'\n", name);
5777 ctxt->wellFormed = 0;
5778 ctxt->disableSAX = 1;
5779 break;
5780 default:
5781 break;
5782 }
5783 }
5784
5785 /*
5786 * [ WFC: No Recursion ]
5787 * A parsed entity must not contain a recursive reference
5788 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005789 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005790 */
5791
5792 } else {
5793 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5795 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005796 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005797 ctxt->wellFormed = 0;
5798 ctxt->disableSAX = 1;
5799 }
5800 xmlFree(name);
5801 }
5802 }
5803 *str = ptr;
5804 return(ent);
5805}
5806
5807/**
5808 * xmlParsePEReference:
5809 * @ctxt: an XML parser context
5810 *
5811 * parse PEReference declarations
5812 * The entity content is handled directly by pushing it's content as
5813 * a new input stream.
5814 *
5815 * [69] PEReference ::= '%' Name ';'
5816 *
5817 * [ WFC: No Recursion ]
5818 * A parsed entity must not contain a recursive
5819 * reference to itself, either directly or indirectly.
5820 *
5821 * [ WFC: Entity Declared ]
5822 * In a document without any DTD, a document with only an internal DTD
5823 * subset which contains no parameter entity references, or a document
5824 * with "standalone='yes'", ... ... The declaration of a parameter
5825 * entity must precede any reference to it...
5826 *
5827 * [ VC: Entity Declared ]
5828 * In a document with an external subset or external parameter entities
5829 * with "standalone='no'", ... ... The declaration of a parameter entity
5830 * must precede any reference to it...
5831 *
5832 * [ WFC: In DTD ]
5833 * Parameter-entity references may only appear in the DTD.
5834 * NOTE: misleading but this is handled.
5835 */
5836void
5837xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5838 xmlChar *name;
5839 xmlEntityPtr entity = NULL;
5840 xmlParserInputPtr input;
5841
5842 if (RAW == '%') {
5843 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005844 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005845 if (name == NULL) {
5846 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5848 ctxt->sax->error(ctxt->userData,
5849 "xmlParsePEReference: no name\n");
5850 ctxt->wellFormed = 0;
5851 ctxt->disableSAX = 1;
5852 } else {
5853 if (RAW == ';') {
5854 NEXT;
5855 if ((ctxt->sax != NULL) &&
5856 (ctxt->sax->getParameterEntity != NULL))
5857 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5858 name);
5859 if (entity == NULL) {
5860 /*
5861 * [ WFC: Entity Declared ]
5862 * In a document without any DTD, a document with only an
5863 * internal DTD subset which contains no parameter entity
5864 * references, or a document with "standalone='yes'", ...
5865 * ... The declaration of a parameter entity must precede
5866 * any reference to it...
5867 */
5868 if ((ctxt->standalone == 1) ||
5869 ((ctxt->hasExternalSubset == 0) &&
5870 (ctxt->hasPErefs == 0))) {
5871 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5872 if ((!ctxt->disableSAX) &&
5873 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5874 ctxt->sax->error(ctxt->userData,
5875 "PEReference: %%%s; not found\n", name);
5876 ctxt->wellFormed = 0;
5877 ctxt->disableSAX = 1;
5878 } else {
5879 /*
5880 * [ VC: Entity Declared ]
5881 * In a document with an external subset or external
5882 * parameter entities with "standalone='no'", ...
5883 * ... The declaration of a parameter entity must precede
5884 * any reference to it...
5885 */
5886 if ((!ctxt->disableSAX) &&
5887 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5888 ctxt->sax->warning(ctxt->userData,
5889 "PEReference: %%%s; not found\n", name);
5890 ctxt->valid = 0;
5891 }
5892 } else {
5893 /*
5894 * Internal checking in case the entity quest barfed
5895 */
5896 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5897 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5898 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5899 ctxt->sax->warning(ctxt->userData,
5900 "Internal: %%%s; is not a parameter entity\n", name);
5901 } else {
5902 /*
5903 * TODO !!!
5904 * handle the extra spaces added before and after
5905 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5906 */
5907 input = xmlNewEntityInputStream(ctxt, entity);
5908 xmlPushInput(ctxt, input);
5909 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5910 (RAW == '<') && (NXT(1) == '?') &&
5911 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5912 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5913 xmlParseTextDecl(ctxt);
5914 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5915 /*
5916 * The XML REC instructs us to stop parsing
5917 * right here
5918 */
5919 ctxt->instate = XML_PARSER_EOF;
5920 xmlFree(name);
5921 return;
5922 }
5923 }
5924 if (ctxt->token == 0)
5925 ctxt->token = ' ';
5926 }
5927 }
5928 ctxt->hasPErefs = 1;
5929 } else {
5930 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5932 ctxt->sax->error(ctxt->userData,
5933 "xmlParsePEReference: expecting ';'\n");
5934 ctxt->wellFormed = 0;
5935 ctxt->disableSAX = 1;
5936 }
5937 xmlFree(name);
5938 }
5939 }
5940}
5941
5942/**
5943 * xmlParseStringPEReference:
5944 * @ctxt: an XML parser context
5945 * @str: a pointer to an index in the string
5946 *
5947 * parse PEReference declarations
5948 *
5949 * [69] PEReference ::= '%' Name ';'
5950 *
5951 * [ WFC: No Recursion ]
5952 * A parsed entity must not contain a recursive
5953 * reference to itself, either directly or indirectly.
5954 *
5955 * [ WFC: Entity Declared ]
5956 * In a document without any DTD, a document with only an internal DTD
5957 * subset which contains no parameter entity references, or a document
5958 * with "standalone='yes'", ... ... The declaration of a parameter
5959 * entity must precede any reference to it...
5960 *
5961 * [ VC: Entity Declared ]
5962 * In a document with an external subset or external parameter entities
5963 * with "standalone='no'", ... ... The declaration of a parameter entity
5964 * must precede any reference to it...
5965 *
5966 * [ WFC: In DTD ]
5967 * Parameter-entity references may only appear in the DTD.
5968 * NOTE: misleading but this is handled.
5969 *
5970 * Returns the string of the entity content.
5971 * str is updated to the current value of the index
5972 */
5973xmlEntityPtr
5974xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5975 const xmlChar *ptr;
5976 xmlChar cur;
5977 xmlChar *name;
5978 xmlEntityPtr entity = NULL;
5979
5980 if ((str == NULL) || (*str == NULL)) return(NULL);
5981 ptr = *str;
5982 cur = *ptr;
5983 if (cur == '%') {
5984 ptr++;
5985 cur = *ptr;
5986 name = xmlParseStringName(ctxt, &ptr);
5987 if (name == NULL) {
5988 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5990 ctxt->sax->error(ctxt->userData,
5991 "xmlParseStringPEReference: no name\n");
5992 ctxt->wellFormed = 0;
5993 ctxt->disableSAX = 1;
5994 } else {
5995 cur = *ptr;
5996 if (cur == ';') {
5997 ptr++;
5998 cur = *ptr;
5999 if ((ctxt->sax != NULL) &&
6000 (ctxt->sax->getParameterEntity != NULL))
6001 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6002 name);
6003 if (entity == NULL) {
6004 /*
6005 * [ WFC: Entity Declared ]
6006 * In a document without any DTD, a document with only an
6007 * internal DTD subset which contains no parameter entity
6008 * references, or a document with "standalone='yes'", ...
6009 * ... The declaration of a parameter entity must precede
6010 * any reference to it...
6011 */
6012 if ((ctxt->standalone == 1) ||
6013 ((ctxt->hasExternalSubset == 0) &&
6014 (ctxt->hasPErefs == 0))) {
6015 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6017 ctxt->sax->error(ctxt->userData,
6018 "PEReference: %%%s; not found\n", name);
6019 ctxt->wellFormed = 0;
6020 ctxt->disableSAX = 1;
6021 } else {
6022 /*
6023 * [ VC: Entity Declared ]
6024 * In a document with an external subset or external
6025 * parameter entities with "standalone='no'", ...
6026 * ... The declaration of a parameter entity must
6027 * precede any reference to it...
6028 */
6029 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6030 ctxt->sax->warning(ctxt->userData,
6031 "PEReference: %%%s; not found\n", name);
6032 ctxt->valid = 0;
6033 }
6034 } else {
6035 /*
6036 * Internal checking in case the entity quest barfed
6037 */
6038 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6039 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6040 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6041 ctxt->sax->warning(ctxt->userData,
6042 "Internal: %%%s; is not a parameter entity\n", name);
6043 }
6044 }
6045 ctxt->hasPErefs = 1;
6046 } else {
6047 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6049 ctxt->sax->error(ctxt->userData,
6050 "xmlParseStringPEReference: expecting ';'\n");
6051 ctxt->wellFormed = 0;
6052 ctxt->disableSAX = 1;
6053 }
6054 xmlFree(name);
6055 }
6056 }
6057 *str = ptr;
6058 return(entity);
6059}
6060
6061/**
6062 * xmlParseDocTypeDecl:
6063 * @ctxt: an XML parser context
6064 *
6065 * parse a DOCTYPE declaration
6066 *
6067 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6068 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6069 *
6070 * [ VC: Root Element Type ]
6071 * The Name in the document type declaration must match the element
6072 * type of the root element.
6073 */
6074
6075void
6076xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6077 xmlChar *name = NULL;
6078 xmlChar *ExternalID = NULL;
6079 xmlChar *URI = NULL;
6080
6081 /*
6082 * We know that '<!DOCTYPE' has been detected.
6083 */
6084 SKIP(9);
6085
6086 SKIP_BLANKS;
6087
6088 /*
6089 * Parse the DOCTYPE name.
6090 */
6091 name = xmlParseName(ctxt);
6092 if (name == NULL) {
6093 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6095 ctxt->sax->error(ctxt->userData,
6096 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6097 ctxt->wellFormed = 0;
6098 ctxt->disableSAX = 1;
6099 }
6100 ctxt->intSubName = name;
6101
6102 SKIP_BLANKS;
6103
6104 /*
6105 * Check for SystemID and ExternalID
6106 */
6107 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6108
6109 if ((URI != NULL) || (ExternalID != NULL)) {
6110 ctxt->hasExternalSubset = 1;
6111 }
6112 ctxt->extSubURI = URI;
6113 ctxt->extSubSystem = ExternalID;
6114
6115 SKIP_BLANKS;
6116
6117 /*
6118 * Create and update the internal subset.
6119 */
6120 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6121 (!ctxt->disableSAX))
6122 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6123
6124 /*
6125 * Is there any internal subset declarations ?
6126 * they are handled separately in xmlParseInternalSubset()
6127 */
6128 if (RAW == '[')
6129 return;
6130
6131 /*
6132 * We should be at the end of the DOCTYPE declaration.
6133 */
6134 if (RAW != '>') {
6135 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006137 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006138 ctxt->wellFormed = 0;
6139 ctxt->disableSAX = 1;
6140 }
6141 NEXT;
6142}
6143
6144/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006145 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006146 * @ctxt: an XML parser context
6147 *
6148 * parse the internal subset declaration
6149 *
6150 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6151 */
6152
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006153static void
Owen Taylor3473f882001-02-23 17:55:21 +00006154xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6155 /*
6156 * Is there any DTD definition ?
6157 */
6158 if (RAW == '[') {
6159 ctxt->instate = XML_PARSER_DTD;
6160 NEXT;
6161 /*
6162 * Parse the succession of Markup declarations and
6163 * PEReferences.
6164 * Subsequence (markupdecl | PEReference | S)*
6165 */
6166 while (RAW != ']') {
6167 const xmlChar *check = CUR_PTR;
6168 int cons = ctxt->input->consumed;
6169
6170 SKIP_BLANKS;
6171 xmlParseMarkupDecl(ctxt);
6172 xmlParsePEReference(ctxt);
6173
6174 /*
6175 * Pop-up of finished entities.
6176 */
6177 while ((RAW == 0) && (ctxt->inputNr > 1))
6178 xmlPopInput(ctxt);
6179
6180 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6181 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6183 ctxt->sax->error(ctxt->userData,
6184 "xmlParseInternalSubset: error detected in Markup declaration\n");
6185 ctxt->wellFormed = 0;
6186 ctxt->disableSAX = 1;
6187 break;
6188 }
6189 }
6190 if (RAW == ']') {
6191 NEXT;
6192 SKIP_BLANKS;
6193 }
6194 }
6195
6196 /*
6197 * We should be at the end of the DOCTYPE declaration.
6198 */
6199 if (RAW != '>') {
6200 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6201 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006202 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006203 ctxt->wellFormed = 0;
6204 ctxt->disableSAX = 1;
6205 }
6206 NEXT;
6207}
6208
6209/**
6210 * xmlParseAttribute:
6211 * @ctxt: an XML parser context
6212 * @value: a xmlChar ** used to store the value of the attribute
6213 *
6214 * parse an attribute
6215 *
6216 * [41] Attribute ::= Name Eq AttValue
6217 *
6218 * [ WFC: No External Entity References ]
6219 * Attribute values cannot contain direct or indirect entity references
6220 * to external entities.
6221 *
6222 * [ WFC: No < in Attribute Values ]
6223 * The replacement text of any entity referred to directly or indirectly in
6224 * an attribute value (other than "&lt;") must not contain a <.
6225 *
6226 * [ VC: Attribute Value Type ]
6227 * The attribute must have been declared; the value must be of the type
6228 * declared for it.
6229 *
6230 * [25] Eq ::= S? '=' S?
6231 *
6232 * With namespace:
6233 *
6234 * [NS 11] Attribute ::= QName Eq AttValue
6235 *
6236 * Also the case QName == xmlns:??? is handled independently as a namespace
6237 * definition.
6238 *
6239 * Returns the attribute name, and the value in *value.
6240 */
6241
6242xmlChar *
6243xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6244 xmlChar *name, *val;
6245
6246 *value = NULL;
6247 name = xmlParseName(ctxt);
6248 if (name == NULL) {
6249 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6251 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6252 ctxt->wellFormed = 0;
6253 ctxt->disableSAX = 1;
6254 return(NULL);
6255 }
6256
6257 /*
6258 * read the value
6259 */
6260 SKIP_BLANKS;
6261 if (RAW == '=') {
6262 NEXT;
6263 SKIP_BLANKS;
6264 val = xmlParseAttValue(ctxt);
6265 ctxt->instate = XML_PARSER_CONTENT;
6266 } else {
6267 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6269 ctxt->sax->error(ctxt->userData,
6270 "Specification mandate value for attribute %s\n", name);
6271 ctxt->wellFormed = 0;
6272 ctxt->disableSAX = 1;
6273 xmlFree(name);
6274 return(NULL);
6275 }
6276
6277 /*
6278 * Check that xml:lang conforms to the specification
6279 * No more registered as an error, just generate a warning now
6280 * since this was deprecated in XML second edition
6281 */
6282 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6283 if (!xmlCheckLanguageID(val)) {
6284 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6285 ctxt->sax->warning(ctxt->userData,
6286 "Malformed value for xml:lang : %s\n", val);
6287 }
6288 }
6289
6290 /*
6291 * Check that xml:space conforms to the specification
6292 */
6293 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6294 if (xmlStrEqual(val, BAD_CAST "default"))
6295 *(ctxt->space) = 0;
6296 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6297 *(ctxt->space) = 1;
6298 else {
6299 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6301 ctxt->sax->error(ctxt->userData,
6302"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6303 val);
6304 ctxt->wellFormed = 0;
6305 ctxt->disableSAX = 1;
6306 }
6307 }
6308
6309 *value = val;
6310 return(name);
6311}
6312
6313/**
6314 * xmlParseStartTag:
6315 * @ctxt: an XML parser context
6316 *
6317 * parse a start of tag either for rule element or
6318 * EmptyElement. In both case we don't parse the tag closing chars.
6319 *
6320 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6321 *
6322 * [ WFC: Unique Att Spec ]
6323 * No attribute name may appear more than once in the same start-tag or
6324 * empty-element tag.
6325 *
6326 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6327 *
6328 * [ WFC: Unique Att Spec ]
6329 * No attribute name may appear more than once in the same start-tag or
6330 * empty-element tag.
6331 *
6332 * With namespace:
6333 *
6334 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6335 *
6336 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6337 *
6338 * Returns the element name parsed
6339 */
6340
6341xmlChar *
6342xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6343 xmlChar *name;
6344 xmlChar *attname;
6345 xmlChar *attvalue;
6346 const xmlChar **atts = NULL;
6347 int nbatts = 0;
6348 int maxatts = 0;
6349 int i;
6350
6351 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006352 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006353
6354 name = xmlParseName(ctxt);
6355 if (name == NULL) {
6356 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6358 ctxt->sax->error(ctxt->userData,
6359 "xmlParseStartTag: invalid element name\n");
6360 ctxt->wellFormed = 0;
6361 ctxt->disableSAX = 1;
6362 return(NULL);
6363 }
6364
6365 /*
6366 * Now parse the attributes, it ends up with the ending
6367 *
6368 * (S Attribute)* S?
6369 */
6370 SKIP_BLANKS;
6371 GROW;
6372
Daniel Veillard21a0f912001-02-25 19:54:14 +00006373 while ((RAW != '>') &&
6374 ((RAW != '/') || (NXT(1) != '>')) &&
6375 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006376 const xmlChar *q = CUR_PTR;
6377 int cons = ctxt->input->consumed;
6378
6379 attname = xmlParseAttribute(ctxt, &attvalue);
6380 if ((attname != NULL) && (attvalue != NULL)) {
6381 /*
6382 * [ WFC: Unique Att Spec ]
6383 * No attribute name may appear more than once in the same
6384 * start-tag or empty-element tag.
6385 */
6386 for (i = 0; i < nbatts;i += 2) {
6387 if (xmlStrEqual(atts[i], attname)) {
6388 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6389 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6390 ctxt->sax->error(ctxt->userData,
6391 "Attribute %s redefined\n",
6392 attname);
6393 ctxt->wellFormed = 0;
6394 ctxt->disableSAX = 1;
6395 xmlFree(attname);
6396 xmlFree(attvalue);
6397 goto failed;
6398 }
6399 }
6400
6401 /*
6402 * Add the pair to atts
6403 */
6404 if (atts == NULL) {
6405 maxatts = 10;
6406 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6407 if (atts == NULL) {
6408 xmlGenericError(xmlGenericErrorContext,
6409 "malloc of %ld byte failed\n",
6410 maxatts * (long)sizeof(xmlChar *));
6411 return(NULL);
6412 }
6413 } else if (nbatts + 4 > maxatts) {
6414 maxatts *= 2;
6415 atts = (const xmlChar **) xmlRealloc((void *) atts,
6416 maxatts * sizeof(xmlChar *));
6417 if (atts == NULL) {
6418 xmlGenericError(xmlGenericErrorContext,
6419 "realloc of %ld byte failed\n",
6420 maxatts * (long)sizeof(xmlChar *));
6421 return(NULL);
6422 }
6423 }
6424 atts[nbatts++] = attname;
6425 atts[nbatts++] = attvalue;
6426 atts[nbatts] = NULL;
6427 atts[nbatts + 1] = NULL;
6428 } else {
6429 if (attname != NULL)
6430 xmlFree(attname);
6431 if (attvalue != NULL)
6432 xmlFree(attvalue);
6433 }
6434
6435failed:
6436
6437 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6438 break;
6439 if (!IS_BLANK(RAW)) {
6440 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6442 ctxt->sax->error(ctxt->userData,
6443 "attributes construct error\n");
6444 ctxt->wellFormed = 0;
6445 ctxt->disableSAX = 1;
6446 }
6447 SKIP_BLANKS;
6448 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6449 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6451 ctxt->sax->error(ctxt->userData,
6452 "xmlParseStartTag: problem parsing attributes\n");
6453 ctxt->wellFormed = 0;
6454 ctxt->disableSAX = 1;
6455 break;
6456 }
6457 GROW;
6458 }
6459
6460 /*
6461 * SAX: Start of Element !
6462 */
6463 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6464 (!ctxt->disableSAX))
6465 ctxt->sax->startElement(ctxt->userData, name, atts);
6466
6467 if (atts != NULL) {
6468 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6469 xmlFree((void *) atts);
6470 }
6471 return(name);
6472}
6473
6474/**
6475 * xmlParseEndTag:
6476 * @ctxt: an XML parser context
6477 *
6478 * parse an end of tag
6479 *
6480 * [42] ETag ::= '</' Name S? '>'
6481 *
6482 * With namespace
6483 *
6484 * [NS 9] ETag ::= '</' QName S? '>'
6485 */
6486
6487void
6488xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6489 xmlChar *name;
6490 xmlChar *oldname;
6491
6492 GROW;
6493 if ((RAW != '<') || (NXT(1) != '/')) {
6494 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6496 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6497 ctxt->wellFormed = 0;
6498 ctxt->disableSAX = 1;
6499 return;
6500 }
6501 SKIP(2);
6502
6503 name = xmlParseName(ctxt);
6504
6505 /*
6506 * We should definitely be at the ending "S? '>'" part
6507 */
6508 GROW;
6509 SKIP_BLANKS;
6510 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6511 ctxt->errNo = XML_ERR_GT_REQUIRED;
6512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6513 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6514 ctxt->wellFormed = 0;
6515 ctxt->disableSAX = 1;
6516 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006517 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006518
6519 /*
6520 * [ WFC: Element Type Match ]
6521 * The Name in an element's end-tag must match the element type in the
6522 * start-tag.
6523 *
6524 */
6525 if ((name == NULL) || (ctxt->name == NULL) ||
6526 (!xmlStrEqual(name, ctxt->name))) {
6527 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6529 if ((name != NULL) && (ctxt->name != NULL)) {
6530 ctxt->sax->error(ctxt->userData,
6531 "Opening and ending tag mismatch: %s and %s\n",
6532 ctxt->name, name);
6533 } else if (ctxt->name != NULL) {
6534 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006535 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006536 } else {
6537 ctxt->sax->error(ctxt->userData,
6538 "Ending tag error: internal error ???\n");
6539 }
6540
6541 }
6542 ctxt->wellFormed = 0;
6543 ctxt->disableSAX = 1;
6544 }
6545
6546 /*
6547 * SAX: End of Tag
6548 */
6549 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6550 (!ctxt->disableSAX))
6551 ctxt->sax->endElement(ctxt->userData, name);
6552
6553 if (name != NULL)
6554 xmlFree(name);
6555 oldname = namePop(ctxt);
6556 spacePop(ctxt);
6557 if (oldname != NULL) {
6558#ifdef DEBUG_STACK
6559 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6560#endif
6561 xmlFree(oldname);
6562 }
6563 return;
6564}
6565
6566/**
6567 * xmlParseCDSect:
6568 * @ctxt: an XML parser context
6569 *
6570 * Parse escaped pure raw content.
6571 *
6572 * [18] CDSect ::= CDStart CData CDEnd
6573 *
6574 * [19] CDStart ::= '<![CDATA['
6575 *
6576 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6577 *
6578 * [21] CDEnd ::= ']]>'
6579 */
6580void
6581xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6582 xmlChar *buf = NULL;
6583 int len = 0;
6584 int size = XML_PARSER_BUFFER_SIZE;
6585 int r, rl;
6586 int s, sl;
6587 int cur, l;
6588 int count = 0;
6589
6590 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6591 (NXT(2) == '[') && (NXT(3) == 'C') &&
6592 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6593 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6594 (NXT(8) == '[')) {
6595 SKIP(9);
6596 } else
6597 return;
6598
6599 ctxt->instate = XML_PARSER_CDATA_SECTION;
6600 r = CUR_CHAR(rl);
6601 if (!IS_CHAR(r)) {
6602 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6604 ctxt->sax->error(ctxt->userData,
6605 "CData section not finished\n");
6606 ctxt->wellFormed = 0;
6607 ctxt->disableSAX = 1;
6608 ctxt->instate = XML_PARSER_CONTENT;
6609 return;
6610 }
6611 NEXTL(rl);
6612 s = CUR_CHAR(sl);
6613 if (!IS_CHAR(s)) {
6614 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6616 ctxt->sax->error(ctxt->userData,
6617 "CData section not finished\n");
6618 ctxt->wellFormed = 0;
6619 ctxt->disableSAX = 1;
6620 ctxt->instate = XML_PARSER_CONTENT;
6621 return;
6622 }
6623 NEXTL(sl);
6624 cur = CUR_CHAR(l);
6625 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6626 if (buf == NULL) {
6627 xmlGenericError(xmlGenericErrorContext,
6628 "malloc of %d byte failed\n", size);
6629 return;
6630 }
6631 while (IS_CHAR(cur) &&
6632 ((r != ']') || (s != ']') || (cur != '>'))) {
6633 if (len + 5 >= size) {
6634 size *= 2;
6635 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6636 if (buf == NULL) {
6637 xmlGenericError(xmlGenericErrorContext,
6638 "realloc of %d byte failed\n", size);
6639 return;
6640 }
6641 }
6642 COPY_BUF(rl,buf,len,r);
6643 r = s;
6644 rl = sl;
6645 s = cur;
6646 sl = l;
6647 count++;
6648 if (count > 50) {
6649 GROW;
6650 count = 0;
6651 }
6652 NEXTL(l);
6653 cur = CUR_CHAR(l);
6654 }
6655 buf[len] = 0;
6656 ctxt->instate = XML_PARSER_CONTENT;
6657 if (cur != '>') {
6658 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6660 ctxt->sax->error(ctxt->userData,
6661 "CData section not finished\n%.50s\n", buf);
6662 ctxt->wellFormed = 0;
6663 ctxt->disableSAX = 1;
6664 xmlFree(buf);
6665 return;
6666 }
6667 NEXTL(l);
6668
6669 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006670 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006671 */
6672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6673 if (ctxt->sax->cdataBlock != NULL)
6674 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006675 else if (ctxt->sax->characters != NULL)
6676 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006677 }
6678 xmlFree(buf);
6679}
6680
6681/**
6682 * xmlParseContent:
6683 * @ctxt: an XML parser context
6684 *
6685 * Parse a content:
6686 *
6687 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6688 */
6689
6690void
6691xmlParseContent(xmlParserCtxtPtr ctxt) {
6692 GROW;
6693 while (((RAW != 0) || (ctxt->token != 0)) &&
6694 ((RAW != '<') || (NXT(1) != '/'))) {
6695 const xmlChar *test = CUR_PTR;
6696 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006697 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006698 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006699
6700 /*
6701 * Handle possible processed charrefs.
6702 */
6703 if (ctxt->token != 0) {
6704 xmlParseCharData(ctxt, 0);
6705 }
6706 /*
6707 * First case : a Processing Instruction.
6708 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006709 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006710 xmlParsePI(ctxt);
6711 }
6712
6713 /*
6714 * Second case : a CDSection
6715 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006716 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006717 (NXT(2) == '[') && (NXT(3) == 'C') &&
6718 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6719 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6720 (NXT(8) == '[')) {
6721 xmlParseCDSect(ctxt);
6722 }
6723
6724 /*
6725 * Third case : a comment
6726 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006727 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006728 (NXT(2) == '-') && (NXT(3) == '-')) {
6729 xmlParseComment(ctxt);
6730 ctxt->instate = XML_PARSER_CONTENT;
6731 }
6732
6733 /*
6734 * Fourth case : a sub-element.
6735 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006736 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006737 xmlParseElement(ctxt);
6738 }
6739
6740 /*
6741 * Fifth case : a reference. If if has not been resolved,
6742 * parsing returns it's Name, create the node
6743 */
6744
Daniel Veillard21a0f912001-02-25 19:54:14 +00006745 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006746 xmlParseReference(ctxt);
6747 }
6748
6749 /*
6750 * Last case, text. Note that References are handled directly.
6751 */
6752 else {
6753 xmlParseCharData(ctxt, 0);
6754 }
6755
6756 GROW;
6757 /*
6758 * Pop-up of finished entities.
6759 */
6760 while ((RAW == 0) && (ctxt->inputNr > 1))
6761 xmlPopInput(ctxt);
6762 SHRINK;
6763
6764 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6765 (tok == ctxt->token)) {
6766 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6768 ctxt->sax->error(ctxt->userData,
6769 "detected an error in element content\n");
6770 ctxt->wellFormed = 0;
6771 ctxt->disableSAX = 1;
6772 ctxt->instate = XML_PARSER_EOF;
6773 break;
6774 }
6775 }
6776}
6777
6778/**
6779 * xmlParseElement:
6780 * @ctxt: an XML parser context
6781 *
6782 * parse an XML element, this is highly recursive
6783 *
6784 * [39] element ::= EmptyElemTag | STag content ETag
6785 *
6786 * [ WFC: Element Type Match ]
6787 * The Name in an element's end-tag must match the element type in the
6788 * start-tag.
6789 *
6790 * [ VC: Element Valid ]
6791 * An element is valid if there is a declaration matching elementdecl
6792 * where the Name matches the element type and one of the following holds:
6793 * - The declaration matches EMPTY and the element has no content.
6794 * - The declaration matches children and the sequence of child elements
6795 * belongs to the language generated by the regular expression in the
6796 * content model, with optional white space (characters matching the
6797 * nonterminal S) between each pair of child elements.
6798 * - The declaration matches Mixed and the content consists of character
6799 * data and child elements whose types match names in the content model.
6800 * - The declaration matches ANY, and the types of any child elements have
6801 * been declared.
6802 */
6803
6804void
6805xmlParseElement(xmlParserCtxtPtr ctxt) {
6806 const xmlChar *openTag = CUR_PTR;
6807 xmlChar *name;
6808 xmlChar *oldname;
6809 xmlParserNodeInfo node_info;
6810 xmlNodePtr ret;
6811
6812 /* Capture start position */
6813 if (ctxt->record_info) {
6814 node_info.begin_pos = ctxt->input->consumed +
6815 (CUR_PTR - ctxt->input->base);
6816 node_info.begin_line = ctxt->input->line;
6817 }
6818
6819 if (ctxt->spaceNr == 0)
6820 spacePush(ctxt, -1);
6821 else
6822 spacePush(ctxt, *ctxt->space);
6823
6824 name = xmlParseStartTag(ctxt);
6825 if (name == NULL) {
6826 spacePop(ctxt);
6827 return;
6828 }
6829 namePush(ctxt, name);
6830 ret = ctxt->node;
6831
6832 /*
6833 * [ VC: Root Element Type ]
6834 * The Name in the document type declaration must match the element
6835 * type of the root element.
6836 */
6837 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6838 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6839 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6840
6841 /*
6842 * Check for an Empty Element.
6843 */
6844 if ((RAW == '/') && (NXT(1) == '>')) {
6845 SKIP(2);
6846 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6847 (!ctxt->disableSAX))
6848 ctxt->sax->endElement(ctxt->userData, name);
6849 oldname = namePop(ctxt);
6850 spacePop(ctxt);
6851 if (oldname != NULL) {
6852#ifdef DEBUG_STACK
6853 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6854#endif
6855 xmlFree(oldname);
6856 }
6857 if ( ret != NULL && ctxt->record_info ) {
6858 node_info.end_pos = ctxt->input->consumed +
6859 (CUR_PTR - ctxt->input->base);
6860 node_info.end_line = ctxt->input->line;
6861 node_info.node = ret;
6862 xmlParserAddNodeInfo(ctxt, &node_info);
6863 }
6864 return;
6865 }
6866 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006867 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006868 } else {
6869 ctxt->errNo = XML_ERR_GT_REQUIRED;
6870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6871 ctxt->sax->error(ctxt->userData,
6872 "Couldn't find end of Start Tag\n%.30s\n",
6873 openTag);
6874 ctxt->wellFormed = 0;
6875 ctxt->disableSAX = 1;
6876
6877 /*
6878 * end of parsing of this node.
6879 */
6880 nodePop(ctxt);
6881 oldname = namePop(ctxt);
6882 spacePop(ctxt);
6883 if (oldname != NULL) {
6884#ifdef DEBUG_STACK
6885 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6886#endif
6887 xmlFree(oldname);
6888 }
6889
6890 /*
6891 * Capture end position and add node
6892 */
6893 if ( ret != NULL && ctxt->record_info ) {
6894 node_info.end_pos = ctxt->input->consumed +
6895 (CUR_PTR - ctxt->input->base);
6896 node_info.end_line = ctxt->input->line;
6897 node_info.node = ret;
6898 xmlParserAddNodeInfo(ctxt, &node_info);
6899 }
6900 return;
6901 }
6902
6903 /*
6904 * Parse the content of the element:
6905 */
6906 xmlParseContent(ctxt);
6907 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006908 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6910 ctxt->sax->error(ctxt->userData,
6911 "Premature end of data in tag %.30s\n", openTag);
6912 ctxt->wellFormed = 0;
6913 ctxt->disableSAX = 1;
6914
6915 /*
6916 * end of parsing of this node.
6917 */
6918 nodePop(ctxt);
6919 oldname = namePop(ctxt);
6920 spacePop(ctxt);
6921 if (oldname != NULL) {
6922#ifdef DEBUG_STACK
6923 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6924#endif
6925 xmlFree(oldname);
6926 }
6927 return;
6928 }
6929
6930 /*
6931 * parse the end of tag: '</' should be here.
6932 */
6933 xmlParseEndTag(ctxt);
6934
6935 /*
6936 * Capture end position and add node
6937 */
6938 if ( ret != NULL && ctxt->record_info ) {
6939 node_info.end_pos = ctxt->input->consumed +
6940 (CUR_PTR - ctxt->input->base);
6941 node_info.end_line = ctxt->input->line;
6942 node_info.node = ret;
6943 xmlParserAddNodeInfo(ctxt, &node_info);
6944 }
6945}
6946
6947/**
6948 * xmlParseVersionNum:
6949 * @ctxt: an XML parser context
6950 *
6951 * parse the XML version value.
6952 *
6953 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6954 *
6955 * Returns the string giving the XML version number, or NULL
6956 */
6957xmlChar *
6958xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6959 xmlChar *buf = NULL;
6960 int len = 0;
6961 int size = 10;
6962 xmlChar cur;
6963
6964 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6965 if (buf == NULL) {
6966 xmlGenericError(xmlGenericErrorContext,
6967 "malloc of %d byte failed\n", size);
6968 return(NULL);
6969 }
6970 cur = CUR;
6971 while (((cur >= 'a') && (cur <= 'z')) ||
6972 ((cur >= 'A') && (cur <= 'Z')) ||
6973 ((cur >= '0') && (cur <= '9')) ||
6974 (cur == '_') || (cur == '.') ||
6975 (cur == ':') || (cur == '-')) {
6976 if (len + 1 >= size) {
6977 size *= 2;
6978 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6979 if (buf == NULL) {
6980 xmlGenericError(xmlGenericErrorContext,
6981 "realloc of %d byte failed\n", size);
6982 return(NULL);
6983 }
6984 }
6985 buf[len++] = cur;
6986 NEXT;
6987 cur=CUR;
6988 }
6989 buf[len] = 0;
6990 return(buf);
6991}
6992
6993/**
6994 * xmlParseVersionInfo:
6995 * @ctxt: an XML parser context
6996 *
6997 * parse the XML version.
6998 *
6999 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7000 *
7001 * [25] Eq ::= S? '=' S?
7002 *
7003 * Returns the version string, e.g. "1.0"
7004 */
7005
7006xmlChar *
7007xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7008 xmlChar *version = NULL;
7009 const xmlChar *q;
7010
7011 if ((RAW == 'v') && (NXT(1) == 'e') &&
7012 (NXT(2) == 'r') && (NXT(3) == 's') &&
7013 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7014 (NXT(6) == 'n')) {
7015 SKIP(7);
7016 SKIP_BLANKS;
7017 if (RAW != '=') {
7018 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7020 ctxt->sax->error(ctxt->userData,
7021 "xmlParseVersionInfo : expected '='\n");
7022 ctxt->wellFormed = 0;
7023 ctxt->disableSAX = 1;
7024 return(NULL);
7025 }
7026 NEXT;
7027 SKIP_BLANKS;
7028 if (RAW == '"') {
7029 NEXT;
7030 q = CUR_PTR;
7031 version = xmlParseVersionNum(ctxt);
7032 if (RAW != '"') {
7033 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7035 ctxt->sax->error(ctxt->userData,
7036 "String not closed\n%.50s\n", q);
7037 ctxt->wellFormed = 0;
7038 ctxt->disableSAX = 1;
7039 } else
7040 NEXT;
7041 } else if (RAW == '\''){
7042 NEXT;
7043 q = CUR_PTR;
7044 version = xmlParseVersionNum(ctxt);
7045 if (RAW != '\'') {
7046 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7048 ctxt->sax->error(ctxt->userData,
7049 "String not closed\n%.50s\n", q);
7050 ctxt->wellFormed = 0;
7051 ctxt->disableSAX = 1;
7052 } else
7053 NEXT;
7054 } else {
7055 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7057 ctxt->sax->error(ctxt->userData,
7058 "xmlParseVersionInfo : expected ' or \"\n");
7059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061 }
7062 }
7063 return(version);
7064}
7065
7066/**
7067 * xmlParseEncName:
7068 * @ctxt: an XML parser context
7069 *
7070 * parse the XML encoding name
7071 *
7072 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7073 *
7074 * Returns the encoding name value or NULL
7075 */
7076xmlChar *
7077xmlParseEncName(xmlParserCtxtPtr ctxt) {
7078 xmlChar *buf = NULL;
7079 int len = 0;
7080 int size = 10;
7081 xmlChar cur;
7082
7083 cur = CUR;
7084 if (((cur >= 'a') && (cur <= 'z')) ||
7085 ((cur >= 'A') && (cur <= 'Z'))) {
7086 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7087 if (buf == NULL) {
7088 xmlGenericError(xmlGenericErrorContext,
7089 "malloc of %d byte failed\n", size);
7090 return(NULL);
7091 }
7092
7093 buf[len++] = cur;
7094 NEXT;
7095 cur = CUR;
7096 while (((cur >= 'a') && (cur <= 'z')) ||
7097 ((cur >= 'A') && (cur <= 'Z')) ||
7098 ((cur >= '0') && (cur <= '9')) ||
7099 (cur == '.') || (cur == '_') ||
7100 (cur == '-')) {
7101 if (len + 1 >= size) {
7102 size *= 2;
7103 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7104 if (buf == NULL) {
7105 xmlGenericError(xmlGenericErrorContext,
7106 "realloc of %d byte failed\n", size);
7107 return(NULL);
7108 }
7109 }
7110 buf[len++] = cur;
7111 NEXT;
7112 cur = CUR;
7113 if (cur == 0) {
7114 SHRINK;
7115 GROW;
7116 cur = CUR;
7117 }
7118 }
7119 buf[len] = 0;
7120 } else {
7121 ctxt->errNo = XML_ERR_ENCODING_NAME;
7122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7123 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7124 ctxt->wellFormed = 0;
7125 ctxt->disableSAX = 1;
7126 }
7127 return(buf);
7128}
7129
7130/**
7131 * xmlParseEncodingDecl:
7132 * @ctxt: an XML parser context
7133 *
7134 * parse the XML encoding declaration
7135 *
7136 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7137 *
7138 * this setups the conversion filters.
7139 *
7140 * Returns the encoding value or NULL
7141 */
7142
7143xmlChar *
7144xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7145 xmlChar *encoding = NULL;
7146 const xmlChar *q;
7147
7148 SKIP_BLANKS;
7149 if ((RAW == 'e') && (NXT(1) == 'n') &&
7150 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7151 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7152 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7153 SKIP(8);
7154 SKIP_BLANKS;
7155 if (RAW != '=') {
7156 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7158 ctxt->sax->error(ctxt->userData,
7159 "xmlParseEncodingDecl : expected '='\n");
7160 ctxt->wellFormed = 0;
7161 ctxt->disableSAX = 1;
7162 return(NULL);
7163 }
7164 NEXT;
7165 SKIP_BLANKS;
7166 if (RAW == '"') {
7167 NEXT;
7168 q = CUR_PTR;
7169 encoding = xmlParseEncName(ctxt);
7170 if (RAW != '"') {
7171 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7173 ctxt->sax->error(ctxt->userData,
7174 "String not closed\n%.50s\n", q);
7175 ctxt->wellFormed = 0;
7176 ctxt->disableSAX = 1;
7177 } else
7178 NEXT;
7179 } else if (RAW == '\''){
7180 NEXT;
7181 q = CUR_PTR;
7182 encoding = xmlParseEncName(ctxt);
7183 if (RAW != '\'') {
7184 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7186 ctxt->sax->error(ctxt->userData,
7187 "String not closed\n%.50s\n", q);
7188 ctxt->wellFormed = 0;
7189 ctxt->disableSAX = 1;
7190 } else
7191 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007192 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007193 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7195 ctxt->sax->error(ctxt->userData,
7196 "xmlParseEncodingDecl : expected ' or \"\n");
7197 ctxt->wellFormed = 0;
7198 ctxt->disableSAX = 1;
7199 }
7200 if (encoding != NULL) {
7201 xmlCharEncoding enc;
7202 xmlCharEncodingHandlerPtr handler;
7203
7204 if (ctxt->input->encoding != NULL)
7205 xmlFree((xmlChar *) ctxt->input->encoding);
7206 ctxt->input->encoding = encoding;
7207
7208 enc = xmlParseCharEncoding((const char *) encoding);
7209 /*
7210 * registered set of known encodings
7211 */
7212 if (enc != XML_CHAR_ENCODING_ERROR) {
7213 xmlSwitchEncoding(ctxt, enc);
7214 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7215 xmlFree(encoding);
7216 return(NULL);
7217 }
7218 } else {
7219 /*
7220 * fallback for unknown encodings
7221 */
7222 handler = xmlFindCharEncodingHandler((const char *) encoding);
7223 if (handler != NULL) {
7224 xmlSwitchToEncoding(ctxt, handler);
7225 } else {
7226 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7228 ctxt->sax->error(ctxt->userData,
7229 "Unsupported encoding %s\n", encoding);
7230 return(NULL);
7231 }
7232 }
7233 }
7234 }
7235 return(encoding);
7236}
7237
7238/**
7239 * xmlParseSDDecl:
7240 * @ctxt: an XML parser context
7241 *
7242 * parse the XML standalone declaration
7243 *
7244 * [32] SDDecl ::= S 'standalone' Eq
7245 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7246 *
7247 * [ VC: Standalone Document Declaration ]
7248 * TODO The standalone document declaration must have the value "no"
7249 * if any external markup declarations contain declarations of:
7250 * - attributes with default values, if elements to which these
7251 * attributes apply appear in the document without specifications
7252 * of values for these attributes, or
7253 * - entities (other than amp, lt, gt, apos, quot), if references
7254 * to those entities appear in the document, or
7255 * - attributes with values subject to normalization, where the
7256 * attribute appears in the document with a value which will change
7257 * as a result of normalization, or
7258 * - element types with element content, if white space occurs directly
7259 * within any instance of those types.
7260 *
7261 * Returns 1 if standalone, 0 otherwise
7262 */
7263
7264int
7265xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7266 int standalone = -1;
7267
7268 SKIP_BLANKS;
7269 if ((RAW == 's') && (NXT(1) == 't') &&
7270 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7271 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7272 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7273 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7274 SKIP(10);
7275 SKIP_BLANKS;
7276 if (RAW != '=') {
7277 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7279 ctxt->sax->error(ctxt->userData,
7280 "XML standalone declaration : expected '='\n");
7281 ctxt->wellFormed = 0;
7282 ctxt->disableSAX = 1;
7283 return(standalone);
7284 }
7285 NEXT;
7286 SKIP_BLANKS;
7287 if (RAW == '\''){
7288 NEXT;
7289 if ((RAW == 'n') && (NXT(1) == 'o')) {
7290 standalone = 0;
7291 SKIP(2);
7292 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7293 (NXT(2) == 's')) {
7294 standalone = 1;
7295 SKIP(3);
7296 } else {
7297 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7299 ctxt->sax->error(ctxt->userData,
7300 "standalone accepts only 'yes' or 'no'\n");
7301 ctxt->wellFormed = 0;
7302 ctxt->disableSAX = 1;
7303 }
7304 if (RAW != '\'') {
7305 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7307 ctxt->sax->error(ctxt->userData, "String not closed\n");
7308 ctxt->wellFormed = 0;
7309 ctxt->disableSAX = 1;
7310 } else
7311 NEXT;
7312 } else if (RAW == '"'){
7313 NEXT;
7314 if ((RAW == 'n') && (NXT(1) == 'o')) {
7315 standalone = 0;
7316 SKIP(2);
7317 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7318 (NXT(2) == 's')) {
7319 standalone = 1;
7320 SKIP(3);
7321 } else {
7322 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7324 ctxt->sax->error(ctxt->userData,
7325 "standalone accepts only 'yes' or 'no'\n");
7326 ctxt->wellFormed = 0;
7327 ctxt->disableSAX = 1;
7328 }
7329 if (RAW != '"') {
7330 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7332 ctxt->sax->error(ctxt->userData, "String not closed\n");
7333 ctxt->wellFormed = 0;
7334 ctxt->disableSAX = 1;
7335 } else
7336 NEXT;
7337 } else {
7338 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340 ctxt->sax->error(ctxt->userData,
7341 "Standalone value not found\n");
7342 ctxt->wellFormed = 0;
7343 ctxt->disableSAX = 1;
7344 }
7345 }
7346 return(standalone);
7347}
7348
7349/**
7350 * xmlParseXMLDecl:
7351 * @ctxt: an XML parser context
7352 *
7353 * parse an XML declaration header
7354 *
7355 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7356 */
7357
7358void
7359xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7360 xmlChar *version;
7361
7362 /*
7363 * We know that '<?xml' is here.
7364 */
7365 SKIP(5);
7366
7367 if (!IS_BLANK(RAW)) {
7368 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7370 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7371 ctxt->wellFormed = 0;
7372 ctxt->disableSAX = 1;
7373 }
7374 SKIP_BLANKS;
7375
7376 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007377 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007378 */
7379 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007380 if (version == NULL) {
7381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7382 ctxt->sax->error(ctxt->userData,
7383 "Malformed declaration expecting version\n");
7384 ctxt->wellFormed = 0;
7385 ctxt->disableSAX = 1;
7386 } else {
7387 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7388 /*
7389 * TODO: Blueberry should be detected here
7390 */
7391 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7392 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7393 version);
7394 }
7395 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007396 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007397 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007398 }
Owen Taylor3473f882001-02-23 17:55:21 +00007399
7400 /*
7401 * We may have the encoding declaration
7402 */
7403 if (!IS_BLANK(RAW)) {
7404 if ((RAW == '?') && (NXT(1) == '>')) {
7405 SKIP(2);
7406 return;
7407 }
7408 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7410 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7411 ctxt->wellFormed = 0;
7412 ctxt->disableSAX = 1;
7413 }
7414 xmlParseEncodingDecl(ctxt);
7415 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7416 /*
7417 * The XML REC instructs us to stop parsing right here
7418 */
7419 return;
7420 }
7421
7422 /*
7423 * We may have the standalone status.
7424 */
7425 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7426 if ((RAW == '?') && (NXT(1) == '>')) {
7427 SKIP(2);
7428 return;
7429 }
7430 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7432 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7433 ctxt->wellFormed = 0;
7434 ctxt->disableSAX = 1;
7435 }
7436 SKIP_BLANKS;
7437 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7438
7439 SKIP_BLANKS;
7440 if ((RAW == '?') && (NXT(1) == '>')) {
7441 SKIP(2);
7442 } else if (RAW == '>') {
7443 /* Deprecated old WD ... */
7444 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7446 ctxt->sax->error(ctxt->userData,
7447 "XML declaration must end-up with '?>'\n");
7448 ctxt->wellFormed = 0;
7449 ctxt->disableSAX = 1;
7450 NEXT;
7451 } else {
7452 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7454 ctxt->sax->error(ctxt->userData,
7455 "parsing XML declaration: '?>' expected\n");
7456 ctxt->wellFormed = 0;
7457 ctxt->disableSAX = 1;
7458 MOVETO_ENDTAG(CUR_PTR);
7459 NEXT;
7460 }
7461}
7462
7463/**
7464 * xmlParseMisc:
7465 * @ctxt: an XML parser context
7466 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007467 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007468 *
7469 * [27] Misc ::= Comment | PI | S
7470 */
7471
7472void
7473xmlParseMisc(xmlParserCtxtPtr ctxt) {
7474 while (((RAW == '<') && (NXT(1) == '?')) ||
7475 ((RAW == '<') && (NXT(1) == '!') &&
7476 (NXT(2) == '-') && (NXT(3) == '-')) ||
7477 IS_BLANK(CUR)) {
7478 if ((RAW == '<') && (NXT(1) == '?')) {
7479 xmlParsePI(ctxt);
7480 } else if (IS_BLANK(CUR)) {
7481 NEXT;
7482 } else
7483 xmlParseComment(ctxt);
7484 }
7485}
7486
7487/**
7488 * xmlParseDocument:
7489 * @ctxt: an XML parser context
7490 *
7491 * parse an XML document (and build a tree if using the standard SAX
7492 * interface).
7493 *
7494 * [1] document ::= prolog element Misc*
7495 *
7496 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7497 *
7498 * Returns 0, -1 in case of error. the parser context is augmented
7499 * as a result of the parsing.
7500 */
7501
7502int
7503xmlParseDocument(xmlParserCtxtPtr ctxt) {
7504 xmlChar start[4];
7505 xmlCharEncoding enc;
7506
7507 xmlInitParser();
7508
7509 GROW;
7510
7511 /*
7512 * SAX: beginning of the document processing.
7513 */
7514 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7515 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7516
Daniel Veillard50f34372001-08-03 12:06:36 +00007517 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007518 /*
7519 * Get the 4 first bytes and decode the charset
7520 * if enc != XML_CHAR_ENCODING_NONE
7521 * plug some encoding conversion routines.
7522 */
7523 start[0] = RAW;
7524 start[1] = NXT(1);
7525 start[2] = NXT(2);
7526 start[3] = NXT(3);
7527 enc = xmlDetectCharEncoding(start, 4);
7528 if (enc != XML_CHAR_ENCODING_NONE) {
7529 xmlSwitchEncoding(ctxt, enc);
7530 }
Owen Taylor3473f882001-02-23 17:55:21 +00007531 }
7532
7533
7534 if (CUR == 0) {
7535 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7537 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7538 ctxt->wellFormed = 0;
7539 ctxt->disableSAX = 1;
7540 }
7541
7542 /*
7543 * Check for the XMLDecl in the Prolog.
7544 */
7545 GROW;
7546 if ((RAW == '<') && (NXT(1) == '?') &&
7547 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7548 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7549
7550 /*
7551 * Note that we will switch encoding on the fly.
7552 */
7553 xmlParseXMLDecl(ctxt);
7554 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7555 /*
7556 * The XML REC instructs us to stop parsing right here
7557 */
7558 return(-1);
7559 }
7560 ctxt->standalone = ctxt->input->standalone;
7561 SKIP_BLANKS;
7562 } else {
7563 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7564 }
7565 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7566 ctxt->sax->startDocument(ctxt->userData);
7567
7568 /*
7569 * The Misc part of the Prolog
7570 */
7571 GROW;
7572 xmlParseMisc(ctxt);
7573
7574 /*
7575 * Then possibly doc type declaration(s) and more Misc
7576 * (doctypedecl Misc*)?
7577 */
7578 GROW;
7579 if ((RAW == '<') && (NXT(1) == '!') &&
7580 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7581 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7582 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7583 (NXT(8) == 'E')) {
7584
7585 ctxt->inSubset = 1;
7586 xmlParseDocTypeDecl(ctxt);
7587 if (RAW == '[') {
7588 ctxt->instate = XML_PARSER_DTD;
7589 xmlParseInternalSubset(ctxt);
7590 }
7591
7592 /*
7593 * Create and update the external subset.
7594 */
7595 ctxt->inSubset = 2;
7596 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7597 (!ctxt->disableSAX))
7598 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7599 ctxt->extSubSystem, ctxt->extSubURI);
7600 ctxt->inSubset = 0;
7601
7602
7603 ctxt->instate = XML_PARSER_PROLOG;
7604 xmlParseMisc(ctxt);
7605 }
7606
7607 /*
7608 * Time to start parsing the tree itself
7609 */
7610 GROW;
7611 if (RAW != '<') {
7612 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7614 ctxt->sax->error(ctxt->userData,
7615 "Start tag expected, '<' not found\n");
7616 ctxt->wellFormed = 0;
7617 ctxt->disableSAX = 1;
7618 ctxt->instate = XML_PARSER_EOF;
7619 } else {
7620 ctxt->instate = XML_PARSER_CONTENT;
7621 xmlParseElement(ctxt);
7622 ctxt->instate = XML_PARSER_EPILOG;
7623
7624
7625 /*
7626 * The Misc part at the end
7627 */
7628 xmlParseMisc(ctxt);
7629
7630 if (RAW != 0) {
7631 ctxt->errNo = XML_ERR_DOCUMENT_END;
7632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7633 ctxt->sax->error(ctxt->userData,
7634 "Extra content at the end of the document\n");
7635 ctxt->wellFormed = 0;
7636 ctxt->disableSAX = 1;
7637 }
7638 ctxt->instate = XML_PARSER_EOF;
7639 }
7640
7641 /*
7642 * SAX: end of the document processing.
7643 */
7644 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7645 (!ctxt->disableSAX))
7646 ctxt->sax->endDocument(ctxt->userData);
7647
Daniel Veillardc7612992002-02-17 22:47:37 +00007648 if (! ctxt->wellFormed) {
7649 ctxt->valid = 0;
7650 return(-1);
7651 }
Owen Taylor3473f882001-02-23 17:55:21 +00007652 return(0);
7653}
7654
7655/**
7656 * xmlParseExtParsedEnt:
7657 * @ctxt: an XML parser context
7658 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007659 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007660 * An external general parsed entity is well-formed if it matches the
7661 * production labeled extParsedEnt.
7662 *
7663 * [78] extParsedEnt ::= TextDecl? content
7664 *
7665 * Returns 0, -1 in case of error. the parser context is augmented
7666 * as a result of the parsing.
7667 */
7668
7669int
7670xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7671 xmlChar start[4];
7672 xmlCharEncoding enc;
7673
7674 xmlDefaultSAXHandlerInit();
7675
7676 GROW;
7677
7678 /*
7679 * SAX: beginning of the document processing.
7680 */
7681 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7682 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7683
7684 /*
7685 * Get the 4 first bytes and decode the charset
7686 * if enc != XML_CHAR_ENCODING_NONE
7687 * plug some encoding conversion routines.
7688 */
7689 start[0] = RAW;
7690 start[1] = NXT(1);
7691 start[2] = NXT(2);
7692 start[3] = NXT(3);
7693 enc = xmlDetectCharEncoding(start, 4);
7694 if (enc != XML_CHAR_ENCODING_NONE) {
7695 xmlSwitchEncoding(ctxt, enc);
7696 }
7697
7698
7699 if (CUR == 0) {
7700 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7702 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7703 ctxt->wellFormed = 0;
7704 ctxt->disableSAX = 1;
7705 }
7706
7707 /*
7708 * Check for the XMLDecl in the Prolog.
7709 */
7710 GROW;
7711 if ((RAW == '<') && (NXT(1) == '?') &&
7712 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7713 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7714
7715 /*
7716 * Note that we will switch encoding on the fly.
7717 */
7718 xmlParseXMLDecl(ctxt);
7719 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7720 /*
7721 * The XML REC instructs us to stop parsing right here
7722 */
7723 return(-1);
7724 }
7725 SKIP_BLANKS;
7726 } else {
7727 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7728 }
7729 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7730 ctxt->sax->startDocument(ctxt->userData);
7731
7732 /*
7733 * Doing validity checking on chunk doesn't make sense
7734 */
7735 ctxt->instate = XML_PARSER_CONTENT;
7736 ctxt->validate = 0;
7737 ctxt->loadsubset = 0;
7738 ctxt->depth = 0;
7739
7740 xmlParseContent(ctxt);
7741
7742 if ((RAW == '<') && (NXT(1) == '/')) {
7743 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7745 ctxt->sax->error(ctxt->userData,
7746 "chunk is not well balanced\n");
7747 ctxt->wellFormed = 0;
7748 ctxt->disableSAX = 1;
7749 } else if (RAW != 0) {
7750 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7752 ctxt->sax->error(ctxt->userData,
7753 "extra content at the end of well balanced chunk\n");
7754 ctxt->wellFormed = 0;
7755 ctxt->disableSAX = 1;
7756 }
7757
7758 /*
7759 * SAX: end of the document processing.
7760 */
7761 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7762 (!ctxt->disableSAX))
7763 ctxt->sax->endDocument(ctxt->userData);
7764
7765 if (! ctxt->wellFormed) return(-1);
7766 return(0);
7767}
7768
7769/************************************************************************
7770 * *
7771 * Progressive parsing interfaces *
7772 * *
7773 ************************************************************************/
7774
7775/**
7776 * xmlParseLookupSequence:
7777 * @ctxt: an XML parser context
7778 * @first: the first char to lookup
7779 * @next: the next char to lookup or zero
7780 * @third: the next char to lookup or zero
7781 *
7782 * Try to find if a sequence (first, next, third) or just (first next) or
7783 * (first) is available in the input stream.
7784 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7785 * to avoid rescanning sequences of bytes, it DOES change the state of the
7786 * parser, do not use liberally.
7787 *
7788 * Returns the index to the current parsing point if the full sequence
7789 * is available, -1 otherwise.
7790 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007791static int
Owen Taylor3473f882001-02-23 17:55:21 +00007792xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7793 xmlChar next, xmlChar third) {
7794 int base, len;
7795 xmlParserInputPtr in;
7796 const xmlChar *buf;
7797
7798 in = ctxt->input;
7799 if (in == NULL) return(-1);
7800 base = in->cur - in->base;
7801 if (base < 0) return(-1);
7802 if (ctxt->checkIndex > base)
7803 base = ctxt->checkIndex;
7804 if (in->buf == NULL) {
7805 buf = in->base;
7806 len = in->length;
7807 } else {
7808 buf = in->buf->buffer->content;
7809 len = in->buf->buffer->use;
7810 }
7811 /* take into account the sequence length */
7812 if (third) len -= 2;
7813 else if (next) len --;
7814 for (;base < len;base++) {
7815 if (buf[base] == first) {
7816 if (third != 0) {
7817 if ((buf[base + 1] != next) ||
7818 (buf[base + 2] != third)) continue;
7819 } else if (next != 0) {
7820 if (buf[base + 1] != next) continue;
7821 }
7822 ctxt->checkIndex = 0;
7823#ifdef DEBUG_PUSH
7824 if (next == 0)
7825 xmlGenericError(xmlGenericErrorContext,
7826 "PP: lookup '%c' found at %d\n",
7827 first, base);
7828 else if (third == 0)
7829 xmlGenericError(xmlGenericErrorContext,
7830 "PP: lookup '%c%c' found at %d\n",
7831 first, next, base);
7832 else
7833 xmlGenericError(xmlGenericErrorContext,
7834 "PP: lookup '%c%c%c' found at %d\n",
7835 first, next, third, base);
7836#endif
7837 return(base - (in->cur - in->base));
7838 }
7839 }
7840 ctxt->checkIndex = base;
7841#ifdef DEBUG_PUSH
7842 if (next == 0)
7843 xmlGenericError(xmlGenericErrorContext,
7844 "PP: lookup '%c' failed\n", first);
7845 else if (third == 0)
7846 xmlGenericError(xmlGenericErrorContext,
7847 "PP: lookup '%c%c' failed\n", first, next);
7848 else
7849 xmlGenericError(xmlGenericErrorContext,
7850 "PP: lookup '%c%c%c' failed\n", first, next, third);
7851#endif
7852 return(-1);
7853}
7854
7855/**
7856 * xmlParseTryOrFinish:
7857 * @ctxt: an XML parser context
7858 * @terminate: last chunk indicator
7859 *
7860 * Try to progress on parsing
7861 *
7862 * Returns zero if no parsing was possible
7863 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007864static int
Owen Taylor3473f882001-02-23 17:55:21 +00007865xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7866 int ret = 0;
7867 int avail;
7868 xmlChar cur, next;
7869
7870#ifdef DEBUG_PUSH
7871 switch (ctxt->instate) {
7872 case XML_PARSER_EOF:
7873 xmlGenericError(xmlGenericErrorContext,
7874 "PP: try EOF\n"); break;
7875 case XML_PARSER_START:
7876 xmlGenericError(xmlGenericErrorContext,
7877 "PP: try START\n"); break;
7878 case XML_PARSER_MISC:
7879 xmlGenericError(xmlGenericErrorContext,
7880 "PP: try MISC\n");break;
7881 case XML_PARSER_COMMENT:
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: try COMMENT\n");break;
7884 case XML_PARSER_PROLOG:
7885 xmlGenericError(xmlGenericErrorContext,
7886 "PP: try PROLOG\n");break;
7887 case XML_PARSER_START_TAG:
7888 xmlGenericError(xmlGenericErrorContext,
7889 "PP: try START_TAG\n");break;
7890 case XML_PARSER_CONTENT:
7891 xmlGenericError(xmlGenericErrorContext,
7892 "PP: try CONTENT\n");break;
7893 case XML_PARSER_CDATA_SECTION:
7894 xmlGenericError(xmlGenericErrorContext,
7895 "PP: try CDATA_SECTION\n");break;
7896 case XML_PARSER_END_TAG:
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: try END_TAG\n");break;
7899 case XML_PARSER_ENTITY_DECL:
7900 xmlGenericError(xmlGenericErrorContext,
7901 "PP: try ENTITY_DECL\n");break;
7902 case XML_PARSER_ENTITY_VALUE:
7903 xmlGenericError(xmlGenericErrorContext,
7904 "PP: try ENTITY_VALUE\n");break;
7905 case XML_PARSER_ATTRIBUTE_VALUE:
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: try ATTRIBUTE_VALUE\n");break;
7908 case XML_PARSER_DTD:
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: try DTD\n");break;
7911 case XML_PARSER_EPILOG:
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: try EPILOG\n");break;
7914 case XML_PARSER_PI:
7915 xmlGenericError(xmlGenericErrorContext,
7916 "PP: try PI\n");break;
7917 case XML_PARSER_IGNORE:
7918 xmlGenericError(xmlGenericErrorContext,
7919 "PP: try IGNORE\n");break;
7920 }
7921#endif
7922
7923 while (1) {
7924 /*
7925 * Pop-up of finished entities.
7926 */
7927 while ((RAW == 0) && (ctxt->inputNr > 1))
7928 xmlPopInput(ctxt);
7929
7930 if (ctxt->input ==NULL) break;
7931 if (ctxt->input->buf == NULL)
7932 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7933 else
7934 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7935 if (avail < 1)
7936 goto done;
7937 switch (ctxt->instate) {
7938 case XML_PARSER_EOF:
7939 /*
7940 * Document parsing is done !
7941 */
7942 goto done;
7943 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007944 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7945 xmlChar start[4];
7946 xmlCharEncoding enc;
7947
7948 /*
7949 * Very first chars read from the document flow.
7950 */
7951 if (avail < 4)
7952 goto done;
7953
7954 /*
7955 * Get the 4 first bytes and decode the charset
7956 * if enc != XML_CHAR_ENCODING_NONE
7957 * plug some encoding conversion routines.
7958 */
7959 start[0] = RAW;
7960 start[1] = NXT(1);
7961 start[2] = NXT(2);
7962 start[3] = NXT(3);
7963 enc = xmlDetectCharEncoding(start, 4);
7964 if (enc != XML_CHAR_ENCODING_NONE) {
7965 xmlSwitchEncoding(ctxt, enc);
7966 }
7967 break;
7968 }
Owen Taylor3473f882001-02-23 17:55:21 +00007969
7970 cur = ctxt->input->cur[0];
7971 next = ctxt->input->cur[1];
7972 if (cur == 0) {
7973 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7974 ctxt->sax->setDocumentLocator(ctxt->userData,
7975 &xmlDefaultSAXLocator);
7976 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7978 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7979 ctxt->wellFormed = 0;
7980 ctxt->disableSAX = 1;
7981 ctxt->instate = XML_PARSER_EOF;
7982#ifdef DEBUG_PUSH
7983 xmlGenericError(xmlGenericErrorContext,
7984 "PP: entering EOF\n");
7985#endif
7986 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7987 ctxt->sax->endDocument(ctxt->userData);
7988 goto done;
7989 }
7990 if ((cur == '<') && (next == '?')) {
7991 /* PI or XML decl */
7992 if (avail < 5) return(ret);
7993 if ((!terminate) &&
7994 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7995 return(ret);
7996 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7997 ctxt->sax->setDocumentLocator(ctxt->userData,
7998 &xmlDefaultSAXLocator);
7999 if ((ctxt->input->cur[2] == 'x') &&
8000 (ctxt->input->cur[3] == 'm') &&
8001 (ctxt->input->cur[4] == 'l') &&
8002 (IS_BLANK(ctxt->input->cur[5]))) {
8003 ret += 5;
8004#ifdef DEBUG_PUSH
8005 xmlGenericError(xmlGenericErrorContext,
8006 "PP: Parsing XML Decl\n");
8007#endif
8008 xmlParseXMLDecl(ctxt);
8009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8010 /*
8011 * The XML REC instructs us to stop parsing right
8012 * here
8013 */
8014 ctxt->instate = XML_PARSER_EOF;
8015 return(0);
8016 }
8017 ctxt->standalone = ctxt->input->standalone;
8018 if ((ctxt->encoding == NULL) &&
8019 (ctxt->input->encoding != NULL))
8020 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8021 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8022 (!ctxt->disableSAX))
8023 ctxt->sax->startDocument(ctxt->userData);
8024 ctxt->instate = XML_PARSER_MISC;
8025#ifdef DEBUG_PUSH
8026 xmlGenericError(xmlGenericErrorContext,
8027 "PP: entering MISC\n");
8028#endif
8029 } else {
8030 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8031 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8032 (!ctxt->disableSAX))
8033 ctxt->sax->startDocument(ctxt->userData);
8034 ctxt->instate = XML_PARSER_MISC;
8035#ifdef DEBUG_PUSH
8036 xmlGenericError(xmlGenericErrorContext,
8037 "PP: entering MISC\n");
8038#endif
8039 }
8040 } else {
8041 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8042 ctxt->sax->setDocumentLocator(ctxt->userData,
8043 &xmlDefaultSAXLocator);
8044 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8045 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8046 (!ctxt->disableSAX))
8047 ctxt->sax->startDocument(ctxt->userData);
8048 ctxt->instate = XML_PARSER_MISC;
8049#ifdef DEBUG_PUSH
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: entering MISC\n");
8052#endif
8053 }
8054 break;
8055 case XML_PARSER_MISC:
8056 SKIP_BLANKS;
8057 if (ctxt->input->buf == NULL)
8058 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8059 else
8060 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8061 if (avail < 2)
8062 goto done;
8063 cur = ctxt->input->cur[0];
8064 next = ctxt->input->cur[1];
8065 if ((cur == '<') && (next == '?')) {
8066 if ((!terminate) &&
8067 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8068 goto done;
8069#ifdef DEBUG_PUSH
8070 xmlGenericError(xmlGenericErrorContext,
8071 "PP: Parsing PI\n");
8072#endif
8073 xmlParsePI(ctxt);
8074 } else if ((cur == '<') && (next == '!') &&
8075 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8076 if ((!terminate) &&
8077 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8078 goto done;
8079#ifdef DEBUG_PUSH
8080 xmlGenericError(xmlGenericErrorContext,
8081 "PP: Parsing Comment\n");
8082#endif
8083 xmlParseComment(ctxt);
8084 ctxt->instate = XML_PARSER_MISC;
8085 } else if ((cur == '<') && (next == '!') &&
8086 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8087 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8088 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8089 (ctxt->input->cur[8] == 'E')) {
8090 if ((!terminate) &&
8091 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8092 goto done;
8093#ifdef DEBUG_PUSH
8094 xmlGenericError(xmlGenericErrorContext,
8095 "PP: Parsing internal subset\n");
8096#endif
8097 ctxt->inSubset = 1;
8098 xmlParseDocTypeDecl(ctxt);
8099 if (RAW == '[') {
8100 ctxt->instate = XML_PARSER_DTD;
8101#ifdef DEBUG_PUSH
8102 xmlGenericError(xmlGenericErrorContext,
8103 "PP: entering DTD\n");
8104#endif
8105 } else {
8106 /*
8107 * Create and update the external subset.
8108 */
8109 ctxt->inSubset = 2;
8110 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8111 (ctxt->sax->externalSubset != NULL))
8112 ctxt->sax->externalSubset(ctxt->userData,
8113 ctxt->intSubName, ctxt->extSubSystem,
8114 ctxt->extSubURI);
8115 ctxt->inSubset = 0;
8116 ctxt->instate = XML_PARSER_PROLOG;
8117#ifdef DEBUG_PUSH
8118 xmlGenericError(xmlGenericErrorContext,
8119 "PP: entering PROLOG\n");
8120#endif
8121 }
8122 } else if ((cur == '<') && (next == '!') &&
8123 (avail < 9)) {
8124 goto done;
8125 } else {
8126 ctxt->instate = XML_PARSER_START_TAG;
8127#ifdef DEBUG_PUSH
8128 xmlGenericError(xmlGenericErrorContext,
8129 "PP: entering START_TAG\n");
8130#endif
8131 }
8132 break;
8133 case XML_PARSER_IGNORE:
8134 xmlGenericError(xmlGenericErrorContext,
8135 "PP: internal error, state == IGNORE");
8136 ctxt->instate = XML_PARSER_DTD;
8137#ifdef DEBUG_PUSH
8138 xmlGenericError(xmlGenericErrorContext,
8139 "PP: entering DTD\n");
8140#endif
8141 break;
8142 case XML_PARSER_PROLOG:
8143 SKIP_BLANKS;
8144 if (ctxt->input->buf == NULL)
8145 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8146 else
8147 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8148 if (avail < 2)
8149 goto done;
8150 cur = ctxt->input->cur[0];
8151 next = ctxt->input->cur[1];
8152 if ((cur == '<') && (next == '?')) {
8153 if ((!terminate) &&
8154 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8155 goto done;
8156#ifdef DEBUG_PUSH
8157 xmlGenericError(xmlGenericErrorContext,
8158 "PP: Parsing PI\n");
8159#endif
8160 xmlParsePI(ctxt);
8161 } else if ((cur == '<') && (next == '!') &&
8162 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8163 if ((!terminate) &&
8164 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8165 goto done;
8166#ifdef DEBUG_PUSH
8167 xmlGenericError(xmlGenericErrorContext,
8168 "PP: Parsing Comment\n");
8169#endif
8170 xmlParseComment(ctxt);
8171 ctxt->instate = XML_PARSER_PROLOG;
8172 } else if ((cur == '<') && (next == '!') &&
8173 (avail < 4)) {
8174 goto done;
8175 } else {
8176 ctxt->instate = XML_PARSER_START_TAG;
8177#ifdef DEBUG_PUSH
8178 xmlGenericError(xmlGenericErrorContext,
8179 "PP: entering START_TAG\n");
8180#endif
8181 }
8182 break;
8183 case XML_PARSER_EPILOG:
8184 SKIP_BLANKS;
8185 if (ctxt->input->buf == NULL)
8186 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8187 else
8188 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8189 if (avail < 2)
8190 goto done;
8191 cur = ctxt->input->cur[0];
8192 next = ctxt->input->cur[1];
8193 if ((cur == '<') && (next == '?')) {
8194 if ((!terminate) &&
8195 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8196 goto done;
8197#ifdef DEBUG_PUSH
8198 xmlGenericError(xmlGenericErrorContext,
8199 "PP: Parsing PI\n");
8200#endif
8201 xmlParsePI(ctxt);
8202 ctxt->instate = XML_PARSER_EPILOG;
8203 } else if ((cur == '<') && (next == '!') &&
8204 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8205 if ((!terminate) &&
8206 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8207 goto done;
8208#ifdef DEBUG_PUSH
8209 xmlGenericError(xmlGenericErrorContext,
8210 "PP: Parsing Comment\n");
8211#endif
8212 xmlParseComment(ctxt);
8213 ctxt->instate = XML_PARSER_EPILOG;
8214 } else if ((cur == '<') && (next == '!') &&
8215 (avail < 4)) {
8216 goto done;
8217 } else {
8218 ctxt->errNo = XML_ERR_DOCUMENT_END;
8219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8220 ctxt->sax->error(ctxt->userData,
8221 "Extra content at the end of the document\n");
8222 ctxt->wellFormed = 0;
8223 ctxt->disableSAX = 1;
8224 ctxt->instate = XML_PARSER_EOF;
8225#ifdef DEBUG_PUSH
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: entering EOF\n");
8228#endif
8229 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8230 (!ctxt->disableSAX))
8231 ctxt->sax->endDocument(ctxt->userData);
8232 goto done;
8233 }
8234 break;
8235 case XML_PARSER_START_TAG: {
8236 xmlChar *name, *oldname;
8237
8238 if ((avail < 2) && (ctxt->inputNr == 1))
8239 goto done;
8240 cur = ctxt->input->cur[0];
8241 if (cur != '<') {
8242 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8244 ctxt->sax->error(ctxt->userData,
8245 "Start tag expect, '<' not found\n");
8246 ctxt->wellFormed = 0;
8247 ctxt->disableSAX = 1;
8248 ctxt->instate = XML_PARSER_EOF;
8249#ifdef DEBUG_PUSH
8250 xmlGenericError(xmlGenericErrorContext,
8251 "PP: entering EOF\n");
8252#endif
8253 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8254 (!ctxt->disableSAX))
8255 ctxt->sax->endDocument(ctxt->userData);
8256 goto done;
8257 }
8258 if ((!terminate) &&
8259 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8260 goto done;
8261 if (ctxt->spaceNr == 0)
8262 spacePush(ctxt, -1);
8263 else
8264 spacePush(ctxt, *ctxt->space);
8265 name = xmlParseStartTag(ctxt);
8266 if (name == NULL) {
8267 spacePop(ctxt);
8268 ctxt->instate = XML_PARSER_EOF;
8269#ifdef DEBUG_PUSH
8270 xmlGenericError(xmlGenericErrorContext,
8271 "PP: entering EOF\n");
8272#endif
8273 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8274 (!ctxt->disableSAX))
8275 ctxt->sax->endDocument(ctxt->userData);
8276 goto done;
8277 }
8278 namePush(ctxt, xmlStrdup(name));
8279
8280 /*
8281 * [ VC: Root Element Type ]
8282 * The Name in the document type declaration must match
8283 * the element type of the root element.
8284 */
8285 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8286 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8287 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8288
8289 /*
8290 * Check for an Empty Element.
8291 */
8292 if ((RAW == '/') && (NXT(1) == '>')) {
8293 SKIP(2);
8294 if ((ctxt->sax != NULL) &&
8295 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8296 ctxt->sax->endElement(ctxt->userData, name);
8297 xmlFree(name);
8298 oldname = namePop(ctxt);
8299 spacePop(ctxt);
8300 if (oldname != NULL) {
8301#ifdef DEBUG_STACK
8302 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8303#endif
8304 xmlFree(oldname);
8305 }
8306 if (ctxt->name == NULL) {
8307 ctxt->instate = XML_PARSER_EPILOG;
8308#ifdef DEBUG_PUSH
8309 xmlGenericError(xmlGenericErrorContext,
8310 "PP: entering EPILOG\n");
8311#endif
8312 } else {
8313 ctxt->instate = XML_PARSER_CONTENT;
8314#ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: entering CONTENT\n");
8317#endif
8318 }
8319 break;
8320 }
8321 if (RAW == '>') {
8322 NEXT;
8323 } else {
8324 ctxt->errNo = XML_ERR_GT_REQUIRED;
8325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8326 ctxt->sax->error(ctxt->userData,
8327 "Couldn't find end of Start Tag %s\n",
8328 name);
8329 ctxt->wellFormed = 0;
8330 ctxt->disableSAX = 1;
8331
8332 /*
8333 * end of parsing of this node.
8334 */
8335 nodePop(ctxt);
8336 oldname = namePop(ctxt);
8337 spacePop(ctxt);
8338 if (oldname != NULL) {
8339#ifdef DEBUG_STACK
8340 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8341#endif
8342 xmlFree(oldname);
8343 }
8344 }
8345 xmlFree(name);
8346 ctxt->instate = XML_PARSER_CONTENT;
8347#ifdef DEBUG_PUSH
8348 xmlGenericError(xmlGenericErrorContext,
8349 "PP: entering CONTENT\n");
8350#endif
8351 break;
8352 }
8353 case XML_PARSER_CONTENT: {
8354 const xmlChar *test;
8355 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008356 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008357
8358 /*
8359 * Handle preparsed entities and charRef
8360 */
8361 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008362 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008363
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008364 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008365 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8366 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008367 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008368 ctxt->token = 0;
8369 }
8370 if ((avail < 2) && (ctxt->inputNr == 1))
8371 goto done;
8372 cur = ctxt->input->cur[0];
8373 next = ctxt->input->cur[1];
8374
8375 test = CUR_PTR;
8376 cons = ctxt->input->consumed;
8377 tok = ctxt->token;
8378 if ((cur == '<') && (next == '?')) {
8379 if ((!terminate) &&
8380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8381 goto done;
8382#ifdef DEBUG_PUSH
8383 xmlGenericError(xmlGenericErrorContext,
8384 "PP: Parsing PI\n");
8385#endif
8386 xmlParsePI(ctxt);
8387 } else if ((cur == '<') && (next == '!') &&
8388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8389 if ((!terminate) &&
8390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8391 goto done;
8392#ifdef DEBUG_PUSH
8393 xmlGenericError(xmlGenericErrorContext,
8394 "PP: Parsing Comment\n");
8395#endif
8396 xmlParseComment(ctxt);
8397 ctxt->instate = XML_PARSER_CONTENT;
8398 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8399 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8400 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8401 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8402 (ctxt->input->cur[8] == '[')) {
8403 SKIP(9);
8404 ctxt->instate = XML_PARSER_CDATA_SECTION;
8405#ifdef DEBUG_PUSH
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: entering CDATA_SECTION\n");
8408#endif
8409 break;
8410 } else if ((cur == '<') && (next == '!') &&
8411 (avail < 9)) {
8412 goto done;
8413 } else if ((cur == '<') && (next == '/')) {
8414 ctxt->instate = XML_PARSER_END_TAG;
8415#ifdef DEBUG_PUSH
8416 xmlGenericError(xmlGenericErrorContext,
8417 "PP: entering END_TAG\n");
8418#endif
8419 break;
8420 } else if (cur == '<') {
8421 ctxt->instate = XML_PARSER_START_TAG;
8422#ifdef DEBUG_PUSH
8423 xmlGenericError(xmlGenericErrorContext,
8424 "PP: entering START_TAG\n");
8425#endif
8426 break;
8427 } else if (cur == '&') {
8428 if ((!terminate) &&
8429 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8430 goto done;
8431#ifdef DEBUG_PUSH
8432 xmlGenericError(xmlGenericErrorContext,
8433 "PP: Parsing Reference\n");
8434#endif
8435 xmlParseReference(ctxt);
8436 } else {
8437 /* TODO Avoid the extra copy, handle directly !!! */
8438 /*
8439 * Goal of the following test is:
8440 * - minimize calls to the SAX 'character' callback
8441 * when they are mergeable
8442 * - handle an problem for isBlank when we only parse
8443 * a sequence of blank chars and the next one is
8444 * not available to check against '<' presence.
8445 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008446 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008447 * of the parser.
8448 */
8449 if ((ctxt->inputNr == 1) &&
8450 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8451 if ((!terminate) &&
8452 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8453 goto done;
8454 }
8455 ctxt->checkIndex = 0;
8456#ifdef DEBUG_PUSH
8457 xmlGenericError(xmlGenericErrorContext,
8458 "PP: Parsing char data\n");
8459#endif
8460 xmlParseCharData(ctxt, 0);
8461 }
8462 /*
8463 * Pop-up of finished entities.
8464 */
8465 while ((RAW == 0) && (ctxt->inputNr > 1))
8466 xmlPopInput(ctxt);
8467 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8468 (tok == ctxt->token)) {
8469 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8471 ctxt->sax->error(ctxt->userData,
8472 "detected an error in element content\n");
8473 ctxt->wellFormed = 0;
8474 ctxt->disableSAX = 1;
8475 ctxt->instate = XML_PARSER_EOF;
8476 break;
8477 }
8478 break;
8479 }
8480 case XML_PARSER_CDATA_SECTION: {
8481 /*
8482 * The Push mode need to have the SAX callback for
8483 * cdataBlock merge back contiguous callbacks.
8484 */
8485 int base;
8486
8487 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8488 if (base < 0) {
8489 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8490 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8491 if (ctxt->sax->cdataBlock != NULL)
8492 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8493 XML_PARSER_BIG_BUFFER_SIZE);
8494 }
8495 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8496 ctxt->checkIndex = 0;
8497 }
8498 goto done;
8499 } else {
8500 if ((ctxt->sax != NULL) && (base > 0) &&
8501 (!ctxt->disableSAX)) {
8502 if (ctxt->sax->cdataBlock != NULL)
8503 ctxt->sax->cdataBlock(ctxt->userData,
8504 ctxt->input->cur, base);
8505 }
8506 SKIP(base + 3);
8507 ctxt->checkIndex = 0;
8508 ctxt->instate = XML_PARSER_CONTENT;
8509#ifdef DEBUG_PUSH
8510 xmlGenericError(xmlGenericErrorContext,
8511 "PP: entering CONTENT\n");
8512#endif
8513 }
8514 break;
8515 }
8516 case XML_PARSER_END_TAG:
8517 if (avail < 2)
8518 goto done;
8519 if ((!terminate) &&
8520 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8521 goto done;
8522 xmlParseEndTag(ctxt);
8523 if (ctxt->name == NULL) {
8524 ctxt->instate = XML_PARSER_EPILOG;
8525#ifdef DEBUG_PUSH
8526 xmlGenericError(xmlGenericErrorContext,
8527 "PP: entering EPILOG\n");
8528#endif
8529 } else {
8530 ctxt->instate = XML_PARSER_CONTENT;
8531#ifdef DEBUG_PUSH
8532 xmlGenericError(xmlGenericErrorContext,
8533 "PP: entering CONTENT\n");
8534#endif
8535 }
8536 break;
8537 case XML_PARSER_DTD: {
8538 /*
8539 * Sorry but progressive parsing of the internal subset
8540 * is not expected to be supported. We first check that
8541 * the full content of the internal subset is available and
8542 * the parsing is launched only at that point.
8543 * Internal subset ends up with "']' S? '>'" in an unescaped
8544 * section and not in a ']]>' sequence which are conditional
8545 * sections (whoever argued to keep that crap in XML deserve
8546 * a place in hell !).
8547 */
8548 int base, i;
8549 xmlChar *buf;
8550 xmlChar quote = 0;
8551
8552 base = ctxt->input->cur - ctxt->input->base;
8553 if (base < 0) return(0);
8554 if (ctxt->checkIndex > base)
8555 base = ctxt->checkIndex;
8556 buf = ctxt->input->buf->buffer->content;
8557 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8558 base++) {
8559 if (quote != 0) {
8560 if (buf[base] == quote)
8561 quote = 0;
8562 continue;
8563 }
8564 if (buf[base] == '"') {
8565 quote = '"';
8566 continue;
8567 }
8568 if (buf[base] == '\'') {
8569 quote = '\'';
8570 continue;
8571 }
8572 if (buf[base] == ']') {
8573 if ((unsigned int) base +1 >=
8574 ctxt->input->buf->buffer->use)
8575 break;
8576 if (buf[base + 1] == ']') {
8577 /* conditional crap, skip both ']' ! */
8578 base++;
8579 continue;
8580 }
8581 for (i = 0;
8582 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8583 i++) {
8584 if (buf[base + i] == '>')
8585 goto found_end_int_subset;
8586 }
8587 break;
8588 }
8589 }
8590 /*
8591 * We didn't found the end of the Internal subset
8592 */
8593 if (quote == 0)
8594 ctxt->checkIndex = base;
8595#ifdef DEBUG_PUSH
8596 if (next == 0)
8597 xmlGenericError(xmlGenericErrorContext,
8598 "PP: lookup of int subset end filed\n");
8599#endif
8600 goto done;
8601
8602found_end_int_subset:
8603 xmlParseInternalSubset(ctxt);
8604 ctxt->inSubset = 2;
8605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8606 (ctxt->sax->externalSubset != NULL))
8607 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8608 ctxt->extSubSystem, ctxt->extSubURI);
8609 ctxt->inSubset = 0;
8610 ctxt->instate = XML_PARSER_PROLOG;
8611 ctxt->checkIndex = 0;
8612#ifdef DEBUG_PUSH
8613 xmlGenericError(xmlGenericErrorContext,
8614 "PP: entering PROLOG\n");
8615#endif
8616 break;
8617 }
8618 case XML_PARSER_COMMENT:
8619 xmlGenericError(xmlGenericErrorContext,
8620 "PP: internal error, state == COMMENT\n");
8621 ctxt->instate = XML_PARSER_CONTENT;
8622#ifdef DEBUG_PUSH
8623 xmlGenericError(xmlGenericErrorContext,
8624 "PP: entering CONTENT\n");
8625#endif
8626 break;
8627 case XML_PARSER_PI:
8628 xmlGenericError(xmlGenericErrorContext,
8629 "PP: internal error, state == PI\n");
8630 ctxt->instate = XML_PARSER_CONTENT;
8631#ifdef DEBUG_PUSH
8632 xmlGenericError(xmlGenericErrorContext,
8633 "PP: entering CONTENT\n");
8634#endif
8635 break;
8636 case XML_PARSER_ENTITY_DECL:
8637 xmlGenericError(xmlGenericErrorContext,
8638 "PP: internal error, state == ENTITY_DECL\n");
8639 ctxt->instate = XML_PARSER_DTD;
8640#ifdef DEBUG_PUSH
8641 xmlGenericError(xmlGenericErrorContext,
8642 "PP: entering DTD\n");
8643#endif
8644 break;
8645 case XML_PARSER_ENTITY_VALUE:
8646 xmlGenericError(xmlGenericErrorContext,
8647 "PP: internal error, state == ENTITY_VALUE\n");
8648 ctxt->instate = XML_PARSER_CONTENT;
8649#ifdef DEBUG_PUSH
8650 xmlGenericError(xmlGenericErrorContext,
8651 "PP: entering DTD\n");
8652#endif
8653 break;
8654 case XML_PARSER_ATTRIBUTE_VALUE:
8655 xmlGenericError(xmlGenericErrorContext,
8656 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8657 ctxt->instate = XML_PARSER_START_TAG;
8658#ifdef DEBUG_PUSH
8659 xmlGenericError(xmlGenericErrorContext,
8660 "PP: entering START_TAG\n");
8661#endif
8662 break;
8663 case XML_PARSER_SYSTEM_LITERAL:
8664 xmlGenericError(xmlGenericErrorContext,
8665 "PP: internal error, state == SYSTEM_LITERAL\n");
8666 ctxt->instate = XML_PARSER_START_TAG;
8667#ifdef DEBUG_PUSH
8668 xmlGenericError(xmlGenericErrorContext,
8669 "PP: entering START_TAG\n");
8670#endif
8671 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008672 case XML_PARSER_PUBLIC_LITERAL:
8673 xmlGenericError(xmlGenericErrorContext,
8674 "PP: internal error, state == PUBLIC_LITERAL\n");
8675 ctxt->instate = XML_PARSER_START_TAG;
8676#ifdef DEBUG_PUSH
8677 xmlGenericError(xmlGenericErrorContext,
8678 "PP: entering START_TAG\n");
8679#endif
8680 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008681 }
8682 }
8683done:
8684#ifdef DEBUG_PUSH
8685 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8686#endif
8687 return(ret);
8688}
8689
8690/**
Owen Taylor3473f882001-02-23 17:55:21 +00008691 * xmlParseChunk:
8692 * @ctxt: an XML parser context
8693 * @chunk: an char array
8694 * @size: the size in byte of the chunk
8695 * @terminate: last chunk indicator
8696 *
8697 * Parse a Chunk of memory
8698 *
8699 * Returns zero if no error, the xmlParserErrors otherwise.
8700 */
8701int
8702xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8703 int terminate) {
8704 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8705 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8706 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8707 int cur = ctxt->input->cur - ctxt->input->base;
8708
8709 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8710 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8711 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008712 ctxt->input->end =
8713 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008714#ifdef DEBUG_PUSH
8715 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8716#endif
8717
8718 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8719 xmlParseTryOrFinish(ctxt, terminate);
8720 } else if (ctxt->instate != XML_PARSER_EOF) {
8721 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8722 xmlParserInputBufferPtr in = ctxt->input->buf;
8723 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8724 (in->raw != NULL)) {
8725 int nbchars;
8726
8727 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8728 if (nbchars < 0) {
8729 xmlGenericError(xmlGenericErrorContext,
8730 "xmlParseChunk: encoder error\n");
8731 return(XML_ERR_INVALID_ENCODING);
8732 }
8733 }
8734 }
8735 }
8736 xmlParseTryOrFinish(ctxt, terminate);
8737 if (terminate) {
8738 /*
8739 * Check for termination
8740 */
8741 if ((ctxt->instate != XML_PARSER_EOF) &&
8742 (ctxt->instate != XML_PARSER_EPILOG)) {
8743 ctxt->errNo = XML_ERR_DOCUMENT_END;
8744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8745 ctxt->sax->error(ctxt->userData,
8746 "Extra content at the end of the document\n");
8747 ctxt->wellFormed = 0;
8748 ctxt->disableSAX = 1;
8749 }
8750 if (ctxt->instate != XML_PARSER_EOF) {
8751 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8752 (!ctxt->disableSAX))
8753 ctxt->sax->endDocument(ctxt->userData);
8754 }
8755 ctxt->instate = XML_PARSER_EOF;
8756 }
8757 return((xmlParserErrors) ctxt->errNo);
8758}
8759
8760/************************************************************************
8761 * *
8762 * I/O front end functions to the parser *
8763 * *
8764 ************************************************************************/
8765
8766/**
8767 * xmlStopParser:
8768 * @ctxt: an XML parser context
8769 *
8770 * Blocks further parser processing
8771 */
8772void
8773xmlStopParser(xmlParserCtxtPtr ctxt) {
8774 ctxt->instate = XML_PARSER_EOF;
8775 if (ctxt->input != NULL)
8776 ctxt->input->cur = BAD_CAST"";
8777}
8778
8779/**
8780 * xmlCreatePushParserCtxt:
8781 * @sax: a SAX handler
8782 * @user_data: The user data returned on SAX callbacks
8783 * @chunk: a pointer to an array of chars
8784 * @size: number of chars in the array
8785 * @filename: an optional file name or URI
8786 *
8787 * Create a parser context for using the XML parser in push mode
8788 * To allow content encoding detection, @size should be >= 4
8789 * The value of @filename is used for fetching external entities
8790 * and error/warning reports.
8791 *
8792 * Returns the new parser context or NULL
8793 */
8794xmlParserCtxtPtr
8795xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8796 const char *chunk, int size, const char *filename) {
8797 xmlParserCtxtPtr ctxt;
8798 xmlParserInputPtr inputStream;
8799 xmlParserInputBufferPtr buf;
8800 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8801
8802 /*
8803 * plug some encoding conversion routines
8804 */
8805 if ((chunk != NULL) && (size >= 4))
8806 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8807
8808 buf = xmlAllocParserInputBuffer(enc);
8809 if (buf == NULL) return(NULL);
8810
8811 ctxt = xmlNewParserCtxt();
8812 if (ctxt == NULL) {
8813 xmlFree(buf);
8814 return(NULL);
8815 }
8816 if (sax != NULL) {
8817 if (ctxt->sax != &xmlDefaultSAXHandler)
8818 xmlFree(ctxt->sax);
8819 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8820 if (ctxt->sax == NULL) {
8821 xmlFree(buf);
8822 xmlFree(ctxt);
8823 return(NULL);
8824 }
8825 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8826 if (user_data != NULL)
8827 ctxt->userData = user_data;
8828 }
8829 if (filename == NULL) {
8830 ctxt->directory = NULL;
8831 } else {
8832 ctxt->directory = xmlParserGetDirectory(filename);
8833 }
8834
8835 inputStream = xmlNewInputStream(ctxt);
8836 if (inputStream == NULL) {
8837 xmlFreeParserCtxt(ctxt);
8838 return(NULL);
8839 }
8840
8841 if (filename == NULL)
8842 inputStream->filename = NULL;
8843 else
8844 inputStream->filename = xmlMemStrdup(filename);
8845 inputStream->buf = buf;
8846 inputStream->base = inputStream->buf->buffer->content;
8847 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008848 inputStream->end =
8849 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008850
8851 inputPush(ctxt, inputStream);
8852
8853 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8854 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008855 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8856 int cur = ctxt->input->cur - ctxt->input->base;
8857
Owen Taylor3473f882001-02-23 17:55:21 +00008858 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008859
8860 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8861 ctxt->input->cur = ctxt->input->base + cur;
8862 ctxt->input->end =
8863 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008864#ifdef DEBUG_PUSH
8865 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8866#endif
8867 }
8868
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008869 if (enc != XML_CHAR_ENCODING_NONE) {
8870 xmlSwitchEncoding(ctxt, enc);
8871 }
8872
Owen Taylor3473f882001-02-23 17:55:21 +00008873 return(ctxt);
8874}
8875
8876/**
8877 * xmlCreateIOParserCtxt:
8878 * @sax: a SAX handler
8879 * @user_data: The user data returned on SAX callbacks
8880 * @ioread: an I/O read function
8881 * @ioclose: an I/O close function
8882 * @ioctx: an I/O handler
8883 * @enc: the charset encoding if known
8884 *
8885 * Create a parser context for using the XML parser with an existing
8886 * I/O stream
8887 *
8888 * Returns the new parser context or NULL
8889 */
8890xmlParserCtxtPtr
8891xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8892 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8893 void *ioctx, xmlCharEncoding enc) {
8894 xmlParserCtxtPtr ctxt;
8895 xmlParserInputPtr inputStream;
8896 xmlParserInputBufferPtr buf;
8897
8898 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8899 if (buf == NULL) return(NULL);
8900
8901 ctxt = xmlNewParserCtxt();
8902 if (ctxt == NULL) {
8903 xmlFree(buf);
8904 return(NULL);
8905 }
8906 if (sax != NULL) {
8907 if (ctxt->sax != &xmlDefaultSAXHandler)
8908 xmlFree(ctxt->sax);
8909 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8910 if (ctxt->sax == NULL) {
8911 xmlFree(buf);
8912 xmlFree(ctxt);
8913 return(NULL);
8914 }
8915 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8916 if (user_data != NULL)
8917 ctxt->userData = user_data;
8918 }
8919
8920 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8921 if (inputStream == NULL) {
8922 xmlFreeParserCtxt(ctxt);
8923 return(NULL);
8924 }
8925 inputPush(ctxt, inputStream);
8926
8927 return(ctxt);
8928}
8929
8930/************************************************************************
8931 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008932 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00008933 * *
8934 ************************************************************************/
8935
8936/**
8937 * xmlIOParseDTD:
8938 * @sax: the SAX handler block or NULL
8939 * @input: an Input Buffer
8940 * @enc: the charset encoding if known
8941 *
8942 * Load and parse a DTD
8943 *
8944 * Returns the resulting xmlDtdPtr or NULL in case of error.
8945 * @input will be freed at parsing end.
8946 */
8947
8948xmlDtdPtr
8949xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8950 xmlCharEncoding enc) {
8951 xmlDtdPtr ret = NULL;
8952 xmlParserCtxtPtr ctxt;
8953 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008954 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008955
8956 if (input == NULL)
8957 return(NULL);
8958
8959 ctxt = xmlNewParserCtxt();
8960 if (ctxt == NULL) {
8961 return(NULL);
8962 }
8963
8964 /*
8965 * Set-up the SAX context
8966 */
8967 if (sax != NULL) {
8968 if (ctxt->sax != NULL)
8969 xmlFree(ctxt->sax);
8970 ctxt->sax = sax;
8971 ctxt->userData = NULL;
8972 }
8973
8974 /*
8975 * generate a parser input from the I/O handler
8976 */
8977
8978 pinput = xmlNewIOInputStream(ctxt, input, enc);
8979 if (pinput == NULL) {
8980 if (sax != NULL) ctxt->sax = NULL;
8981 xmlFreeParserCtxt(ctxt);
8982 return(NULL);
8983 }
8984
8985 /*
8986 * plug some encoding conversion routines here.
8987 */
8988 xmlPushInput(ctxt, pinput);
8989
8990 pinput->filename = NULL;
8991 pinput->line = 1;
8992 pinput->col = 1;
8993 pinput->base = ctxt->input->cur;
8994 pinput->cur = ctxt->input->cur;
8995 pinput->free = NULL;
8996
8997 /*
8998 * let's parse that entity knowing it's an external subset.
8999 */
9000 ctxt->inSubset = 2;
9001 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9002 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9003 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009004
9005 if (enc == XML_CHAR_ENCODING_NONE) {
9006 /*
9007 * Get the 4 first bytes and decode the charset
9008 * if enc != XML_CHAR_ENCODING_NONE
9009 * plug some encoding conversion routines.
9010 */
9011 start[0] = RAW;
9012 start[1] = NXT(1);
9013 start[2] = NXT(2);
9014 start[3] = NXT(3);
9015 enc = xmlDetectCharEncoding(start, 4);
9016 if (enc != XML_CHAR_ENCODING_NONE) {
9017 xmlSwitchEncoding(ctxt, enc);
9018 }
9019 }
9020
Owen Taylor3473f882001-02-23 17:55:21 +00009021 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9022
9023 if (ctxt->myDoc != NULL) {
9024 if (ctxt->wellFormed) {
9025 ret = ctxt->myDoc->extSubset;
9026 ctxt->myDoc->extSubset = NULL;
9027 } else {
9028 ret = NULL;
9029 }
9030 xmlFreeDoc(ctxt->myDoc);
9031 ctxt->myDoc = NULL;
9032 }
9033 if (sax != NULL) ctxt->sax = NULL;
9034 xmlFreeParserCtxt(ctxt);
9035
9036 return(ret);
9037}
9038
9039/**
9040 * xmlSAXParseDTD:
9041 * @sax: the SAX handler block
9042 * @ExternalID: a NAME* containing the External ID of the DTD
9043 * @SystemID: a NAME* containing the URL to the DTD
9044 *
9045 * Load and parse an external subset.
9046 *
9047 * Returns the resulting xmlDtdPtr or NULL in case of error.
9048 */
9049
9050xmlDtdPtr
9051xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9052 const xmlChar *SystemID) {
9053 xmlDtdPtr ret = NULL;
9054 xmlParserCtxtPtr ctxt;
9055 xmlParserInputPtr input = NULL;
9056 xmlCharEncoding enc;
9057
9058 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9059
9060 ctxt = xmlNewParserCtxt();
9061 if (ctxt == NULL) {
9062 return(NULL);
9063 }
9064
9065 /*
9066 * Set-up the SAX context
9067 */
9068 if (sax != NULL) {
9069 if (ctxt->sax != NULL)
9070 xmlFree(ctxt->sax);
9071 ctxt->sax = sax;
9072 ctxt->userData = NULL;
9073 }
9074
9075 /*
9076 * Ask the Entity resolver to load the damn thing
9077 */
9078
9079 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9080 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9081 if (input == NULL) {
9082 if (sax != NULL) ctxt->sax = NULL;
9083 xmlFreeParserCtxt(ctxt);
9084 return(NULL);
9085 }
9086
9087 /*
9088 * plug some encoding conversion routines here.
9089 */
9090 xmlPushInput(ctxt, input);
9091 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9092 xmlSwitchEncoding(ctxt, enc);
9093
9094 if (input->filename == NULL)
9095 input->filename = (char *) xmlStrdup(SystemID);
9096 input->line = 1;
9097 input->col = 1;
9098 input->base = ctxt->input->cur;
9099 input->cur = ctxt->input->cur;
9100 input->free = NULL;
9101
9102 /*
9103 * let's parse that entity knowing it's an external subset.
9104 */
9105 ctxt->inSubset = 2;
9106 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9107 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9108 ExternalID, SystemID);
9109 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9110
9111 if (ctxt->myDoc != NULL) {
9112 if (ctxt->wellFormed) {
9113 ret = ctxt->myDoc->extSubset;
9114 ctxt->myDoc->extSubset = NULL;
9115 } else {
9116 ret = NULL;
9117 }
9118 xmlFreeDoc(ctxt->myDoc);
9119 ctxt->myDoc = NULL;
9120 }
9121 if (sax != NULL) ctxt->sax = NULL;
9122 xmlFreeParserCtxt(ctxt);
9123
9124 return(ret);
9125}
9126
9127/**
9128 * xmlParseDTD:
9129 * @ExternalID: a NAME* containing the External ID of the DTD
9130 * @SystemID: a NAME* containing the URL to the DTD
9131 *
9132 * Load and parse an external subset.
9133 *
9134 * Returns the resulting xmlDtdPtr or NULL in case of error.
9135 */
9136
9137xmlDtdPtr
9138xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9139 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9140}
9141
9142/************************************************************************
9143 * *
9144 * Front ends when parsing an Entity *
9145 * *
9146 ************************************************************************/
9147
9148/**
Owen Taylor3473f882001-02-23 17:55:21 +00009149 * xmlParseCtxtExternalEntity:
9150 * @ctx: the existing parsing context
9151 * @URL: the URL for the entity to load
9152 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009153 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009154 *
9155 * Parse an external general entity within an existing parsing context
9156 * An external general parsed entity is well-formed if it matches the
9157 * production labeled extParsedEnt.
9158 *
9159 * [78] extParsedEnt ::= TextDecl? content
9160 *
9161 * Returns 0 if the entity is well formed, -1 in case of args problem and
9162 * the parser error code otherwise
9163 */
9164
9165int
9166xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009167 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009168 xmlParserCtxtPtr ctxt;
9169 xmlDocPtr newDoc;
9170 xmlSAXHandlerPtr oldsax = NULL;
9171 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009172 xmlChar start[4];
9173 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009174
9175 if (ctx->depth > 40) {
9176 return(XML_ERR_ENTITY_LOOP);
9177 }
9178
Daniel Veillardcda96922001-08-21 10:56:31 +00009179 if (lst != NULL)
9180 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009181 if ((URL == NULL) && (ID == NULL))
9182 return(-1);
9183 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9184 return(-1);
9185
9186
9187 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9188 if (ctxt == NULL) return(-1);
9189 ctxt->userData = ctxt;
9190 oldsax = ctxt->sax;
9191 ctxt->sax = ctx->sax;
9192 newDoc = xmlNewDoc(BAD_CAST "1.0");
9193 if (newDoc == NULL) {
9194 xmlFreeParserCtxt(ctxt);
9195 return(-1);
9196 }
9197 if (ctx->myDoc != NULL) {
9198 newDoc->intSubset = ctx->myDoc->intSubset;
9199 newDoc->extSubset = ctx->myDoc->extSubset;
9200 }
9201 if (ctx->myDoc->URL != NULL) {
9202 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9203 }
9204 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9205 if (newDoc->children == NULL) {
9206 ctxt->sax = oldsax;
9207 xmlFreeParserCtxt(ctxt);
9208 newDoc->intSubset = NULL;
9209 newDoc->extSubset = NULL;
9210 xmlFreeDoc(newDoc);
9211 return(-1);
9212 }
9213 nodePush(ctxt, newDoc->children);
9214 if (ctx->myDoc == NULL) {
9215 ctxt->myDoc = newDoc;
9216 } else {
9217 ctxt->myDoc = ctx->myDoc;
9218 newDoc->children->doc = ctx->myDoc;
9219 }
9220
Daniel Veillard87a764e2001-06-20 17:41:10 +00009221 /*
9222 * Get the 4 first bytes and decode the charset
9223 * if enc != XML_CHAR_ENCODING_NONE
9224 * plug some encoding conversion routines.
9225 */
9226 GROW
9227 start[0] = RAW;
9228 start[1] = NXT(1);
9229 start[2] = NXT(2);
9230 start[3] = NXT(3);
9231 enc = xmlDetectCharEncoding(start, 4);
9232 if (enc != XML_CHAR_ENCODING_NONE) {
9233 xmlSwitchEncoding(ctxt, enc);
9234 }
9235
Owen Taylor3473f882001-02-23 17:55:21 +00009236 /*
9237 * Parse a possible text declaration first
9238 */
Owen Taylor3473f882001-02-23 17:55:21 +00009239 if ((RAW == '<') && (NXT(1) == '?') &&
9240 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9241 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9242 xmlParseTextDecl(ctxt);
9243 }
9244
9245 /*
9246 * Doing validity checking on chunk doesn't make sense
9247 */
9248 ctxt->instate = XML_PARSER_CONTENT;
9249 ctxt->validate = ctx->validate;
9250 ctxt->loadsubset = ctx->loadsubset;
9251 ctxt->depth = ctx->depth + 1;
9252 ctxt->replaceEntities = ctx->replaceEntities;
9253 if (ctxt->validate) {
9254 ctxt->vctxt.error = ctx->vctxt.error;
9255 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009256 } else {
9257 ctxt->vctxt.error = NULL;
9258 ctxt->vctxt.warning = NULL;
9259 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009260 ctxt->vctxt.nodeTab = NULL;
9261 ctxt->vctxt.nodeNr = 0;
9262 ctxt->vctxt.nodeMax = 0;
9263 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009264
9265 xmlParseContent(ctxt);
9266
9267 if ((RAW == '<') && (NXT(1) == '/')) {
9268 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9270 ctxt->sax->error(ctxt->userData,
9271 "chunk is not well balanced\n");
9272 ctxt->wellFormed = 0;
9273 ctxt->disableSAX = 1;
9274 } else if (RAW != 0) {
9275 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9276 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9277 ctxt->sax->error(ctxt->userData,
9278 "extra content at the end of well balanced chunk\n");
9279 ctxt->wellFormed = 0;
9280 ctxt->disableSAX = 1;
9281 }
9282 if (ctxt->node != newDoc->children) {
9283 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9285 ctxt->sax->error(ctxt->userData,
9286 "chunk is not well balanced\n");
9287 ctxt->wellFormed = 0;
9288 ctxt->disableSAX = 1;
9289 }
9290
9291 if (!ctxt->wellFormed) {
9292 if (ctxt->errNo == 0)
9293 ret = 1;
9294 else
9295 ret = ctxt->errNo;
9296 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009297 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009298 xmlNodePtr cur;
9299
9300 /*
9301 * Return the newly created nodeset after unlinking it from
9302 * they pseudo parent.
9303 */
9304 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009305 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009306 while (cur != NULL) {
9307 cur->parent = NULL;
9308 cur = cur->next;
9309 }
9310 newDoc->children->children = NULL;
9311 }
9312 ret = 0;
9313 }
9314 ctxt->sax = oldsax;
9315 xmlFreeParserCtxt(ctxt);
9316 newDoc->intSubset = NULL;
9317 newDoc->extSubset = NULL;
9318 xmlFreeDoc(newDoc);
9319
9320 return(ret);
9321}
9322
9323/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009324 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009325 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009326 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009327 * @sax: the SAX handler bloc (possibly NULL)
9328 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9329 * @depth: Used for loop detection, use 0
9330 * @URL: the URL for the entity to load
9331 * @ID: the System ID for the entity to load
9332 * @list: the return value for the set of parsed nodes
9333 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009334 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009335 *
9336 * Returns 0 if the entity is well formed, -1 in case of args problem and
9337 * the parser error code otherwise
9338 */
9339
Daniel Veillard257d9102001-05-08 10:41:44 +00009340static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009341xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9342 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009343 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009344 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009345 xmlParserCtxtPtr ctxt;
9346 xmlDocPtr newDoc;
9347 xmlSAXHandlerPtr oldsax = NULL;
9348 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009349 xmlChar start[4];
9350 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009351
9352 if (depth > 40) {
9353 return(XML_ERR_ENTITY_LOOP);
9354 }
9355
9356
9357
9358 if (list != NULL)
9359 *list = NULL;
9360 if ((URL == NULL) && (ID == NULL))
9361 return(-1);
9362 if (doc == NULL) /* @@ relax but check for dereferences */
9363 return(-1);
9364
9365
9366 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9367 if (ctxt == NULL) return(-1);
9368 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009369 if (oldctxt != NULL) {
9370 ctxt->_private = oldctxt->_private;
9371 ctxt->loadsubset = oldctxt->loadsubset;
9372 ctxt->validate = oldctxt->validate;
9373 ctxt->external = oldctxt->external;
9374 } else {
9375 /*
9376 * Doing validity checking on chunk without context
9377 * doesn't make sense
9378 */
9379 ctxt->_private = NULL;
9380 ctxt->validate = 0;
9381 ctxt->external = 2;
9382 ctxt->loadsubset = 0;
9383 }
Owen Taylor3473f882001-02-23 17:55:21 +00009384 if (sax != NULL) {
9385 oldsax = ctxt->sax;
9386 ctxt->sax = sax;
9387 if (user_data != NULL)
9388 ctxt->userData = user_data;
9389 }
9390 newDoc = xmlNewDoc(BAD_CAST "1.0");
9391 if (newDoc == NULL) {
9392 xmlFreeParserCtxt(ctxt);
9393 return(-1);
9394 }
9395 if (doc != NULL) {
9396 newDoc->intSubset = doc->intSubset;
9397 newDoc->extSubset = doc->extSubset;
9398 }
9399 if (doc->URL != NULL) {
9400 newDoc->URL = xmlStrdup(doc->URL);
9401 }
9402 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9403 if (newDoc->children == NULL) {
9404 if (sax != NULL)
9405 ctxt->sax = oldsax;
9406 xmlFreeParserCtxt(ctxt);
9407 newDoc->intSubset = NULL;
9408 newDoc->extSubset = NULL;
9409 xmlFreeDoc(newDoc);
9410 return(-1);
9411 }
9412 nodePush(ctxt, newDoc->children);
9413 if (doc == NULL) {
9414 ctxt->myDoc = newDoc;
9415 } else {
9416 ctxt->myDoc = doc;
9417 newDoc->children->doc = doc;
9418 }
9419
Daniel Veillard87a764e2001-06-20 17:41:10 +00009420 /*
9421 * Get the 4 first bytes and decode the charset
9422 * if enc != XML_CHAR_ENCODING_NONE
9423 * plug some encoding conversion routines.
9424 */
9425 GROW;
9426 start[0] = RAW;
9427 start[1] = NXT(1);
9428 start[2] = NXT(2);
9429 start[3] = NXT(3);
9430 enc = xmlDetectCharEncoding(start, 4);
9431 if (enc != XML_CHAR_ENCODING_NONE) {
9432 xmlSwitchEncoding(ctxt, enc);
9433 }
9434
Owen Taylor3473f882001-02-23 17:55:21 +00009435 /*
9436 * Parse a possible text declaration first
9437 */
Owen Taylor3473f882001-02-23 17:55:21 +00009438 if ((RAW == '<') && (NXT(1) == '?') &&
9439 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9440 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9441 xmlParseTextDecl(ctxt);
9442 }
9443
Owen Taylor3473f882001-02-23 17:55:21 +00009444 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009445 ctxt->depth = depth;
9446
9447 xmlParseContent(ctxt);
9448
9449 if ((RAW == '<') && (NXT(1) == '/')) {
9450 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9452 ctxt->sax->error(ctxt->userData,
9453 "chunk is not well balanced\n");
9454 ctxt->wellFormed = 0;
9455 ctxt->disableSAX = 1;
9456 } else if (RAW != 0) {
9457 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9459 ctxt->sax->error(ctxt->userData,
9460 "extra content at the end of well balanced chunk\n");
9461 ctxt->wellFormed = 0;
9462 ctxt->disableSAX = 1;
9463 }
9464 if (ctxt->node != newDoc->children) {
9465 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9467 ctxt->sax->error(ctxt->userData,
9468 "chunk is not well balanced\n");
9469 ctxt->wellFormed = 0;
9470 ctxt->disableSAX = 1;
9471 }
9472
9473 if (!ctxt->wellFormed) {
9474 if (ctxt->errNo == 0)
9475 ret = 1;
9476 else
9477 ret = ctxt->errNo;
9478 } else {
9479 if (list != NULL) {
9480 xmlNodePtr cur;
9481
9482 /*
9483 * Return the newly created nodeset after unlinking it from
9484 * they pseudo parent.
9485 */
9486 cur = newDoc->children->children;
9487 *list = cur;
9488 while (cur != NULL) {
9489 cur->parent = NULL;
9490 cur = cur->next;
9491 }
9492 newDoc->children->children = NULL;
9493 }
9494 ret = 0;
9495 }
9496 if (sax != NULL)
9497 ctxt->sax = oldsax;
9498 xmlFreeParserCtxt(ctxt);
9499 newDoc->intSubset = NULL;
9500 newDoc->extSubset = NULL;
9501 xmlFreeDoc(newDoc);
9502
9503 return(ret);
9504}
9505
9506/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009507 * xmlParseExternalEntity:
9508 * @doc: the document the chunk pertains to
9509 * @sax: the SAX handler bloc (possibly NULL)
9510 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9511 * @depth: Used for loop detection, use 0
9512 * @URL: the URL for the entity to load
9513 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009514 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009515 *
9516 * Parse an external general entity
9517 * An external general parsed entity is well-formed if it matches the
9518 * production labeled extParsedEnt.
9519 *
9520 * [78] extParsedEnt ::= TextDecl? content
9521 *
9522 * Returns 0 if the entity is well formed, -1 in case of args problem and
9523 * the parser error code otherwise
9524 */
9525
9526int
9527xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009528 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009529 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009530 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009531}
9532
9533/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009534 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009535 * @doc: the document the chunk pertains to
9536 * @sax: the SAX handler bloc (possibly NULL)
9537 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9538 * @depth: Used for loop detection, use 0
9539 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009540 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009541 *
9542 * Parse a well-balanced chunk of an XML document
9543 * called by the parser
9544 * The allowed sequence for the Well Balanced Chunk is the one defined by
9545 * the content production in the XML grammar:
9546 *
9547 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9548 *
9549 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9550 * the parser error code otherwise
9551 */
9552
9553int
9554xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009555 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009556 xmlParserCtxtPtr ctxt;
9557 xmlDocPtr newDoc;
9558 xmlSAXHandlerPtr oldsax = NULL;
9559 int size;
9560 int ret = 0;
9561
9562 if (depth > 40) {
9563 return(XML_ERR_ENTITY_LOOP);
9564 }
9565
9566
Daniel Veillardcda96922001-08-21 10:56:31 +00009567 if (lst != NULL)
9568 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009569 if (string == NULL)
9570 return(-1);
9571
9572 size = xmlStrlen(string);
9573
9574 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9575 if (ctxt == NULL) return(-1);
9576 ctxt->userData = ctxt;
9577 if (sax != NULL) {
9578 oldsax = ctxt->sax;
9579 ctxt->sax = sax;
9580 if (user_data != NULL)
9581 ctxt->userData = user_data;
9582 }
9583 newDoc = xmlNewDoc(BAD_CAST "1.0");
9584 if (newDoc == NULL) {
9585 xmlFreeParserCtxt(ctxt);
9586 return(-1);
9587 }
9588 if (doc != NULL) {
9589 newDoc->intSubset = doc->intSubset;
9590 newDoc->extSubset = doc->extSubset;
9591 }
9592 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9593 if (newDoc->children == NULL) {
9594 if (sax != NULL)
9595 ctxt->sax = oldsax;
9596 xmlFreeParserCtxt(ctxt);
9597 newDoc->intSubset = NULL;
9598 newDoc->extSubset = NULL;
9599 xmlFreeDoc(newDoc);
9600 return(-1);
9601 }
9602 nodePush(ctxt, newDoc->children);
9603 if (doc == NULL) {
9604 ctxt->myDoc = newDoc;
9605 } else {
9606 ctxt->myDoc = doc;
9607 newDoc->children->doc = doc;
9608 }
9609 ctxt->instate = XML_PARSER_CONTENT;
9610 ctxt->depth = depth;
9611
9612 /*
9613 * Doing validity checking on chunk doesn't make sense
9614 */
9615 ctxt->validate = 0;
9616 ctxt->loadsubset = 0;
9617
9618 xmlParseContent(ctxt);
9619
9620 if ((RAW == '<') && (NXT(1) == '/')) {
9621 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9622 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9623 ctxt->sax->error(ctxt->userData,
9624 "chunk is not well balanced\n");
9625 ctxt->wellFormed = 0;
9626 ctxt->disableSAX = 1;
9627 } else if (RAW != 0) {
9628 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9630 ctxt->sax->error(ctxt->userData,
9631 "extra content at the end of well balanced chunk\n");
9632 ctxt->wellFormed = 0;
9633 ctxt->disableSAX = 1;
9634 }
9635 if (ctxt->node != newDoc->children) {
9636 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9638 ctxt->sax->error(ctxt->userData,
9639 "chunk is not well balanced\n");
9640 ctxt->wellFormed = 0;
9641 ctxt->disableSAX = 1;
9642 }
9643
9644 if (!ctxt->wellFormed) {
9645 if (ctxt->errNo == 0)
9646 ret = 1;
9647 else
9648 ret = ctxt->errNo;
9649 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009650 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009651 xmlNodePtr cur;
9652
9653 /*
9654 * Return the newly created nodeset after unlinking it from
9655 * they pseudo parent.
9656 */
9657 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009658 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009659 while (cur != NULL) {
9660 cur->parent = NULL;
9661 cur = cur->next;
9662 }
9663 newDoc->children->children = NULL;
9664 }
9665 ret = 0;
9666 }
9667 if (sax != NULL)
9668 ctxt->sax = oldsax;
9669 xmlFreeParserCtxt(ctxt);
9670 newDoc->intSubset = NULL;
9671 newDoc->extSubset = NULL;
9672 xmlFreeDoc(newDoc);
9673
9674 return(ret);
9675}
9676
9677/**
9678 * xmlSAXParseEntity:
9679 * @sax: the SAX handler block
9680 * @filename: the filename
9681 *
9682 * parse an XML external entity out of context and build a tree.
9683 * It use the given SAX function block to handle the parsing callback.
9684 * If sax is NULL, fallback to the default DOM tree building routines.
9685 *
9686 * [78] extParsedEnt ::= TextDecl? content
9687 *
9688 * This correspond to a "Well Balanced" chunk
9689 *
9690 * Returns the resulting document tree
9691 */
9692
9693xmlDocPtr
9694xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9695 xmlDocPtr ret;
9696 xmlParserCtxtPtr ctxt;
9697 char *directory = NULL;
9698
9699 ctxt = xmlCreateFileParserCtxt(filename);
9700 if (ctxt == NULL) {
9701 return(NULL);
9702 }
9703 if (sax != NULL) {
9704 if (ctxt->sax != NULL)
9705 xmlFree(ctxt->sax);
9706 ctxt->sax = sax;
9707 ctxt->userData = NULL;
9708 }
9709
9710 if ((ctxt->directory == NULL) && (directory == NULL))
9711 directory = xmlParserGetDirectory(filename);
9712
9713 xmlParseExtParsedEnt(ctxt);
9714
9715 if (ctxt->wellFormed)
9716 ret = ctxt->myDoc;
9717 else {
9718 ret = NULL;
9719 xmlFreeDoc(ctxt->myDoc);
9720 ctxt->myDoc = NULL;
9721 }
9722 if (sax != NULL)
9723 ctxt->sax = NULL;
9724 xmlFreeParserCtxt(ctxt);
9725
9726 return(ret);
9727}
9728
9729/**
9730 * xmlParseEntity:
9731 * @filename: the filename
9732 *
9733 * parse an XML external entity out of context and build a tree.
9734 *
9735 * [78] extParsedEnt ::= TextDecl? content
9736 *
9737 * This correspond to a "Well Balanced" chunk
9738 *
9739 * Returns the resulting document tree
9740 */
9741
9742xmlDocPtr
9743xmlParseEntity(const char *filename) {
9744 return(xmlSAXParseEntity(NULL, filename));
9745}
9746
9747/**
9748 * xmlCreateEntityParserCtxt:
9749 * @URL: the entity URL
9750 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009751 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009752 *
9753 * Create a parser context for an external entity
9754 * Automatic support for ZLIB/Compress compressed document is provided
9755 * by default if found at compile-time.
9756 *
9757 * Returns the new parser context or NULL
9758 */
9759xmlParserCtxtPtr
9760xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9761 const xmlChar *base) {
9762 xmlParserCtxtPtr ctxt;
9763 xmlParserInputPtr inputStream;
9764 char *directory = NULL;
9765 xmlChar *uri;
9766
9767 ctxt = xmlNewParserCtxt();
9768 if (ctxt == NULL) {
9769 return(NULL);
9770 }
9771
9772 uri = xmlBuildURI(URL, base);
9773
9774 if (uri == NULL) {
9775 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9776 if (inputStream == NULL) {
9777 xmlFreeParserCtxt(ctxt);
9778 return(NULL);
9779 }
9780
9781 inputPush(ctxt, inputStream);
9782
9783 if ((ctxt->directory == NULL) && (directory == NULL))
9784 directory = xmlParserGetDirectory((char *)URL);
9785 if ((ctxt->directory == NULL) && (directory != NULL))
9786 ctxt->directory = directory;
9787 } else {
9788 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9789 if (inputStream == NULL) {
9790 xmlFree(uri);
9791 xmlFreeParserCtxt(ctxt);
9792 return(NULL);
9793 }
9794
9795 inputPush(ctxt, inputStream);
9796
9797 if ((ctxt->directory == NULL) && (directory == NULL))
9798 directory = xmlParserGetDirectory((char *)uri);
9799 if ((ctxt->directory == NULL) && (directory != NULL))
9800 ctxt->directory = directory;
9801 xmlFree(uri);
9802 }
9803
9804 return(ctxt);
9805}
9806
9807/************************************************************************
9808 * *
9809 * Front ends when parsing from a file *
9810 * *
9811 ************************************************************************/
9812
9813/**
9814 * xmlCreateFileParserCtxt:
9815 * @filename: the filename
9816 *
9817 * Create a parser context for a file content.
9818 * Automatic support for ZLIB/Compress compressed document is provided
9819 * by default if found at compile-time.
9820 *
9821 * Returns the new parser context or NULL
9822 */
9823xmlParserCtxtPtr
9824xmlCreateFileParserCtxt(const char *filename)
9825{
9826 xmlParserCtxtPtr ctxt;
9827 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009828 char *directory = NULL;
9829
Owen Taylor3473f882001-02-23 17:55:21 +00009830 ctxt = xmlNewParserCtxt();
9831 if (ctxt == NULL) {
9832 if (xmlDefaultSAXHandler.error != NULL) {
9833 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9834 }
9835 return(NULL);
9836 }
9837
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009838 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 if (inputStream == NULL) {
9840 xmlFreeParserCtxt(ctxt);
9841 return(NULL);
9842 }
9843
Owen Taylor3473f882001-02-23 17:55:21 +00009844 inputPush(ctxt, inputStream);
9845 if ((ctxt->directory == NULL) && (directory == NULL))
9846 directory = xmlParserGetDirectory(filename);
9847 if ((ctxt->directory == NULL) && (directory != NULL))
9848 ctxt->directory = directory;
9849
9850 return(ctxt);
9851}
9852
9853/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009854 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009855 * @sax: the SAX handler block
9856 * @filename: the filename
9857 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9858 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009859 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009860 *
9861 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9862 * compressed document is provided by default if found at compile-time.
9863 * It use the given SAX function block to handle the parsing callback.
9864 * If sax is NULL, fallback to the default DOM tree building routines.
9865 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009866 * User data (void *) is stored within the parser context, so it is
9867 * available nearly everywhere in libxml.
9868 *
Owen Taylor3473f882001-02-23 17:55:21 +00009869 * Returns the resulting document tree
9870 */
9871
9872xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009873xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9874 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009875 xmlDocPtr ret;
9876 xmlParserCtxtPtr ctxt;
9877 char *directory = NULL;
9878
Daniel Veillard635ef722001-10-29 11:48:19 +00009879 xmlInitParser();
9880
Owen Taylor3473f882001-02-23 17:55:21 +00009881 ctxt = xmlCreateFileParserCtxt(filename);
9882 if (ctxt == NULL) {
9883 return(NULL);
9884 }
9885 if (sax != NULL) {
9886 if (ctxt->sax != NULL)
9887 xmlFree(ctxt->sax);
9888 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009889 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009890 if (data!=NULL) {
9891 ctxt->_private=data;
9892 }
Owen Taylor3473f882001-02-23 17:55:21 +00009893
9894 if ((ctxt->directory == NULL) && (directory == NULL))
9895 directory = xmlParserGetDirectory(filename);
9896 if ((ctxt->directory == NULL) && (directory != NULL))
9897 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9898
9899 xmlParseDocument(ctxt);
9900
9901 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9902 else {
9903 ret = NULL;
9904 xmlFreeDoc(ctxt->myDoc);
9905 ctxt->myDoc = NULL;
9906 }
9907 if (sax != NULL)
9908 ctxt->sax = NULL;
9909 xmlFreeParserCtxt(ctxt);
9910
9911 return(ret);
9912}
9913
9914/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009915 * xmlSAXParseFile:
9916 * @sax: the SAX handler block
9917 * @filename: the filename
9918 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9919 * documents
9920 *
9921 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9922 * compressed document is provided by default if found at compile-time.
9923 * It use the given SAX function block to handle the parsing callback.
9924 * If sax is NULL, fallback to the default DOM tree building routines.
9925 *
9926 * Returns the resulting document tree
9927 */
9928
9929xmlDocPtr
9930xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9931 int recovery) {
9932 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9933}
9934
9935/**
Owen Taylor3473f882001-02-23 17:55:21 +00009936 * xmlRecoverDoc:
9937 * @cur: a pointer to an array of xmlChar
9938 *
9939 * parse an XML in-memory document and build a tree.
9940 * In the case the document is not Well Formed, a tree is built anyway
9941 *
9942 * Returns the resulting document tree
9943 */
9944
9945xmlDocPtr
9946xmlRecoverDoc(xmlChar *cur) {
9947 return(xmlSAXParseDoc(NULL, cur, 1));
9948}
9949
9950/**
9951 * xmlParseFile:
9952 * @filename: the filename
9953 *
9954 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9955 * compressed document is provided by default if found at compile-time.
9956 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009957 * Returns the resulting document tree if the file was wellformed,
9958 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009959 */
9960
9961xmlDocPtr
9962xmlParseFile(const char *filename) {
9963 return(xmlSAXParseFile(NULL, filename, 0));
9964}
9965
9966/**
9967 * xmlRecoverFile:
9968 * @filename: the filename
9969 *
9970 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9971 * compressed document is provided by default if found at compile-time.
9972 * In the case the document is not Well Formed, a tree is built anyway
9973 *
9974 * Returns the resulting document tree
9975 */
9976
9977xmlDocPtr
9978xmlRecoverFile(const char *filename) {
9979 return(xmlSAXParseFile(NULL, filename, 1));
9980}
9981
9982
9983/**
9984 * xmlSetupParserForBuffer:
9985 * @ctxt: an XML parser context
9986 * @buffer: a xmlChar * buffer
9987 * @filename: a file name
9988 *
9989 * Setup the parser context to parse a new buffer; Clears any prior
9990 * contents from the parser context. The buffer parameter must not be
9991 * NULL, but the filename parameter can be
9992 */
9993void
9994xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9995 const char* filename)
9996{
9997 xmlParserInputPtr input;
9998
9999 input = xmlNewInputStream(ctxt);
10000 if (input == NULL) {
10001 perror("malloc");
10002 xmlFree(ctxt);
10003 return;
10004 }
10005
10006 xmlClearParserCtxt(ctxt);
10007 if (filename != NULL)
10008 input->filename = xmlMemStrdup(filename);
10009 input->base = buffer;
10010 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010011 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010012 inputPush(ctxt, input);
10013}
10014
10015/**
10016 * xmlSAXUserParseFile:
10017 * @sax: a SAX handler
10018 * @user_data: The user data returned on SAX callbacks
10019 * @filename: a file name
10020 *
10021 * parse an XML file and call the given SAX handler routines.
10022 * Automatic support for ZLIB/Compress compressed document is provided
10023 *
10024 * Returns 0 in case of success or a error number otherwise
10025 */
10026int
10027xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10028 const char *filename) {
10029 int ret = 0;
10030 xmlParserCtxtPtr ctxt;
10031
10032 ctxt = xmlCreateFileParserCtxt(filename);
10033 if (ctxt == NULL) return -1;
10034 if (ctxt->sax != &xmlDefaultSAXHandler)
10035 xmlFree(ctxt->sax);
10036 ctxt->sax = sax;
10037 if (user_data != NULL)
10038 ctxt->userData = user_data;
10039
10040 xmlParseDocument(ctxt);
10041
10042 if (ctxt->wellFormed)
10043 ret = 0;
10044 else {
10045 if (ctxt->errNo != 0)
10046 ret = ctxt->errNo;
10047 else
10048 ret = -1;
10049 }
10050 if (sax != NULL)
10051 ctxt->sax = NULL;
10052 xmlFreeParserCtxt(ctxt);
10053
10054 return ret;
10055}
10056
10057/************************************************************************
10058 * *
10059 * Front ends when parsing from memory *
10060 * *
10061 ************************************************************************/
10062
10063/**
10064 * xmlCreateMemoryParserCtxt:
10065 * @buffer: a pointer to a char array
10066 * @size: the size of the array
10067 *
10068 * Create a parser context for an XML in-memory document.
10069 *
10070 * Returns the new parser context or NULL
10071 */
10072xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010073xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010074 xmlParserCtxtPtr ctxt;
10075 xmlParserInputPtr input;
10076 xmlParserInputBufferPtr buf;
10077
10078 if (buffer == NULL)
10079 return(NULL);
10080 if (size <= 0)
10081 return(NULL);
10082
10083 ctxt = xmlNewParserCtxt();
10084 if (ctxt == NULL)
10085 return(NULL);
10086
10087 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10088 if (buf == NULL) return(NULL);
10089
10090 input = xmlNewInputStream(ctxt);
10091 if (input == NULL) {
10092 xmlFreeParserCtxt(ctxt);
10093 return(NULL);
10094 }
10095
10096 input->filename = NULL;
10097 input->buf = buf;
10098 input->base = input->buf->buffer->content;
10099 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010100 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010101
10102 inputPush(ctxt, input);
10103 return(ctxt);
10104}
10105
10106/**
10107 * xmlSAXParseMemory:
10108 * @sax: the SAX handler block
10109 * @buffer: an pointer to a char array
10110 * @size: the size of the array
10111 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10112 * documents
10113 *
10114 * parse an XML in-memory block and use the given SAX function block
10115 * to handle the parsing callback. If sax is NULL, fallback to the default
10116 * DOM tree building routines.
10117 *
10118 * Returns the resulting document tree
10119 */
10120xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010121xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10122 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010123 xmlDocPtr ret;
10124 xmlParserCtxtPtr ctxt;
10125
10126 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10127 if (ctxt == NULL) return(NULL);
10128 if (sax != NULL) {
10129 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010130 }
10131
10132 xmlParseDocument(ctxt);
10133
10134 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10135 else {
10136 ret = NULL;
10137 xmlFreeDoc(ctxt->myDoc);
10138 ctxt->myDoc = NULL;
10139 }
10140 if (sax != NULL)
10141 ctxt->sax = NULL;
10142 xmlFreeParserCtxt(ctxt);
10143
10144 return(ret);
10145}
10146
10147/**
10148 * xmlParseMemory:
10149 * @buffer: an pointer to a char array
10150 * @size: the size of the array
10151 *
10152 * parse an XML in-memory block and build a tree.
10153 *
10154 * Returns the resulting document tree
10155 */
10156
Daniel Veillard50822cb2001-07-26 20:05:51 +000010157xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010158 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10159}
10160
10161/**
10162 * xmlRecoverMemory:
10163 * @buffer: an pointer to a char array
10164 * @size: the size of the array
10165 *
10166 * parse an XML in-memory block and build a tree.
10167 * In the case the document is not Well Formed, a tree is built anyway
10168 *
10169 * Returns the resulting document tree
10170 */
10171
Daniel Veillard50822cb2001-07-26 20:05:51 +000010172xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010173 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10174}
10175
10176/**
10177 * xmlSAXUserParseMemory:
10178 * @sax: a SAX handler
10179 * @user_data: The user data returned on SAX callbacks
10180 * @buffer: an in-memory XML document input
10181 * @size: the length of the XML document in bytes
10182 *
10183 * A better SAX parsing routine.
10184 * parse an XML in-memory buffer and call the given SAX handler routines.
10185 *
10186 * Returns 0 in case of success or a error number otherwise
10187 */
10188int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010189 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010190 int ret = 0;
10191 xmlParserCtxtPtr ctxt;
10192 xmlSAXHandlerPtr oldsax = NULL;
10193
10194 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10195 if (ctxt == NULL) return -1;
10196 if (sax != NULL) {
10197 oldsax = ctxt->sax;
10198 ctxt->sax = sax;
10199 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010200 if (user_data != NULL)
10201 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010202
10203 xmlParseDocument(ctxt);
10204
10205 if (ctxt->wellFormed)
10206 ret = 0;
10207 else {
10208 if (ctxt->errNo != 0)
10209 ret = ctxt->errNo;
10210 else
10211 ret = -1;
10212 }
10213 if (sax != NULL) {
10214 ctxt->sax = oldsax;
10215 }
10216 xmlFreeParserCtxt(ctxt);
10217
10218 return ret;
10219}
10220
10221/**
10222 * xmlCreateDocParserCtxt:
10223 * @cur: a pointer to an array of xmlChar
10224 *
10225 * Creates a parser context for an XML in-memory document.
10226 *
10227 * Returns the new parser context or NULL
10228 */
10229xmlParserCtxtPtr
10230xmlCreateDocParserCtxt(xmlChar *cur) {
10231 int len;
10232
10233 if (cur == NULL)
10234 return(NULL);
10235 len = xmlStrlen(cur);
10236 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10237}
10238
10239/**
10240 * xmlSAXParseDoc:
10241 * @sax: the SAX handler block
10242 * @cur: a pointer to an array of xmlChar
10243 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10244 * documents
10245 *
10246 * parse an XML in-memory document and build a tree.
10247 * It use the given SAX function block to handle the parsing callback.
10248 * If sax is NULL, fallback to the default DOM tree building routines.
10249 *
10250 * Returns the resulting document tree
10251 */
10252
10253xmlDocPtr
10254xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10255 xmlDocPtr ret;
10256 xmlParserCtxtPtr ctxt;
10257
10258 if (cur == NULL) return(NULL);
10259
10260
10261 ctxt = xmlCreateDocParserCtxt(cur);
10262 if (ctxt == NULL) return(NULL);
10263 if (sax != NULL) {
10264 ctxt->sax = sax;
10265 ctxt->userData = NULL;
10266 }
10267
10268 xmlParseDocument(ctxt);
10269 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10270 else {
10271 ret = NULL;
10272 xmlFreeDoc(ctxt->myDoc);
10273 ctxt->myDoc = NULL;
10274 }
10275 if (sax != NULL)
10276 ctxt->sax = NULL;
10277 xmlFreeParserCtxt(ctxt);
10278
10279 return(ret);
10280}
10281
10282/**
10283 * xmlParseDoc:
10284 * @cur: a pointer to an array of xmlChar
10285 *
10286 * parse an XML in-memory document and build a tree.
10287 *
10288 * Returns the resulting document tree
10289 */
10290
10291xmlDocPtr
10292xmlParseDoc(xmlChar *cur) {
10293 return(xmlSAXParseDoc(NULL, cur, 0));
10294}
10295
Daniel Veillard8107a222002-01-13 14:10:10 +000010296/************************************************************************
10297 * *
10298 * Specific function to keep track of entities references *
10299 * and used by the XSLT debugger *
10300 * *
10301 ************************************************************************/
10302
10303static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10304
10305/**
10306 * xmlAddEntityReference:
10307 * @ent : A valid entity
10308 * @firstNode : A valid first node for children of entity
10309 * @lastNode : A valid last node of children entity
10310 *
10311 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10312 */
10313static void
10314xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10315 xmlNodePtr lastNode)
10316{
10317 if (xmlEntityRefFunc != NULL) {
10318 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10319 }
10320}
10321
10322
10323/**
10324 * xmlSetEntityReferenceFunc:
10325 * @func : A valid function
10326 *
10327 * Set the function to call call back when a xml reference has been made
10328 */
10329void
10330xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10331{
10332 xmlEntityRefFunc = func;
10333}
Owen Taylor3473f882001-02-23 17:55:21 +000010334
10335/************************************************************************
10336 * *
10337 * Miscellaneous *
10338 * *
10339 ************************************************************************/
10340
10341#ifdef LIBXML_XPATH_ENABLED
10342#include <libxml/xpath.h>
10343#endif
10344
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010345extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010346static int xmlParserInitialized = 0;
10347
10348/**
10349 * xmlInitParser:
10350 *
10351 * Initialization function for the XML parser.
10352 * This is not reentrant. Call once before processing in case of
10353 * use in multithreaded programs.
10354 */
10355
10356void
10357xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010358 if (xmlParserInitialized != 0)
10359 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010360
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010361 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10362 (xmlGenericError == NULL))
10363 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010364 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010365 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010366 xmlInitCharEncodingHandlers();
10367 xmlInitializePredefinedEntities();
10368 xmlDefaultSAXHandlerInit();
10369 xmlRegisterDefaultInputCallbacks();
10370 xmlRegisterDefaultOutputCallbacks();
10371#ifdef LIBXML_HTML_ENABLED
10372 htmlInitAutoClose();
10373 htmlDefaultSAXHandlerInit();
10374#endif
10375#ifdef LIBXML_XPATH_ENABLED
10376 xmlXPathInit();
10377#endif
10378 xmlParserInitialized = 1;
10379}
10380
10381/**
10382 * xmlCleanupParser:
10383 *
10384 * Cleanup function for the XML parser. It tries to reclaim all
10385 * parsing related global memory allocated for the parser processing.
10386 * It doesn't deallocate any document related memory. Calling this
10387 * function should not prevent reusing the parser.
10388 */
10389
10390void
10391xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010392 xmlCleanupCharEncodingHandlers();
10393 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010394#ifdef LIBXML_CATALOG_ENABLED
10395 xmlCatalogCleanup();
10396#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010397 xmlCleanupThreads();
10398 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010399}