blob: 9e34792181e6ad7508c1afc8f6066c3c52ab7ef9 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard0fb18932003-09-07 09:14:37 +000088#define SAX2 1
89
Daniel Veillard21a0f912001-02-25 19:54:14 +000090#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000091#define XML_PARSER_BUFFER_SIZE 100
92
Daniel Veillard5997aca2002-03-18 18:36:20 +000093#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
94
Owen Taylor3473f882001-02-23 17:55:21 +000095/*
Owen Taylor3473f882001-02-23 17:55:21 +000096 * List of XML prefixed PI allowed by W3C specs
97 */
98
Daniel Veillardb44025c2001-10-11 22:55:55 +000099static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000100 "xml-stylesheet",
101 NULL
102};
103
104/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000105xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
106 const xmlChar **str);
107
Daniel Veillard7d515752003-09-26 19:12:37 +0000108static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000109xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
110 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000111 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000112 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000113
Daniel Veillard81273902003-09-30 00:43:48 +0000114#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000115static void
116xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
117 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000118#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000119
Daniel Veillard7d515752003-09-26 19:12:37 +0000120static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000121xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
122 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000123
124/************************************************************************
125 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000126 * Some factorized error routines *
127 * *
128 ************************************************************************/
129
130/**
131 * xmlErrMemory:
132 * @ctxt: an XML parser context
133 * @extra: extra informations
134 *
135 * Handle a redefinition of attribute error
136 */
137static void
138xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
139{
140 if (ctxt != NULL) {
141 ctxt->errNo = XML_ERR_NO_MEMORY;
142 ctxt->instate = XML_PARSER_EOF;
143 ctxt->disableSAX = 1;
144 }
145 if ((ctxt != NULL) && (ctxt->sax != NULL)
146 && (ctxt->sax->error != NULL)) {
147 if (extra)
148 ctxt->sax->error(ctxt->userData,
149 "Memory allocation failed : %s\n", extra);
150 else
151 ctxt->sax->error(ctxt->userData,
152 "Memory allocation failed !\n");
153 } else {
154 if (extra)
155 xmlGenericError(xmlGenericErrorContext,
156 "Memory allocation failed : %s\n", extra);
157 else
158 xmlGenericError(xmlGenericErrorContext,
159 "Memory allocation failed !\n");
160 }
161}
162
163/**
164 * xmlErrAttributeDup:
165 * @ctxt: an XML parser context
166 * @prefix: the attribute prefix
167 * @localname: the attribute localname
168 *
169 * Handle a redefinition of attribute error
170 */
171static void
172xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
173 const xmlChar * localname)
174{
175 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
177 if (prefix == NULL)
178 ctxt->sax->error(ctxt->userData,
179 "Attribute %s redefined\n", localname);
180 else
181 ctxt->sax->error(ctxt->userData,
182 "Attribute %s:%s redefined\n", prefix,
183 localname);
184 }
185 ctxt->wellFormed = 0;
186 if (ctxt->recovery == 0)
187 ctxt->disableSAX = 1;
188}
189
190/**
191 * xmlFatalErr:
192 * @ctxt: an XML parser context
193 * @error: the error number
194 * @extra: extra information string
195 *
196 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
197 */
198static void
199xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char * info)
200{
201 const char *errmsg;
202
203 if (ctxt == NULL) {
204 xmlGenericError(xmlGenericErrorContext,
205 "xmlFatalErr: no context !\n");
206 return;
207 }
208 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
209 return;
210 switch (error) {
211 case XML_ERR_INVALID_HEX_CHARREF:
212 errmsg = "CharRef: invalid hexadecimal value\n";
213 break;
214 case XML_ERR_INVALID_DEC_CHARREF:
215 errmsg = "CharRef: invalid decimal value\n";
216 break;
217 case XML_ERR_INVALID_CHARREF:
218 errmsg = "CharRef: invalid value\n";
219 break;
220 case XML_ERR_INTERNAL_ERROR:
221 errmsg = "internal error";
222 break;
223 case XML_ERR_PEREF_AT_EOF:
224 errmsg = "PEReference at end of document\n";
225 break;
226 case XML_ERR_PEREF_IN_PROLOG:
227 errmsg = "PEReference in prolog\n";
228 break;
229 case XML_ERR_PEREF_IN_EPILOG:
230 errmsg = "PEReference in epilog\n";
231 break;
232 case XML_ERR_PEREF_NO_NAME:
233 errmsg = "PEReference: no name\n";
234 break;
235 case XML_ERR_PEREF_SEMICOL_MISSING:
236 errmsg = "PEReference: expecting ';'\n";
237 break;
238 case XML_ERR_ENTITY_LOOP:
239 errmsg = "Detected an entity reference loop\n";
240 break;
241 case XML_ERR_ENTITY_NOT_STARTED:
242 errmsg = "EntityValue: \" or ' expected\n";
243 break;
244 case XML_ERR_ENTITY_PE_INTERNAL:
245 errmsg = "PEReferences forbidden in internal subset\n";
246 break;
247 case XML_ERR_ENTITY_NOT_FINISHED:
248 errmsg = "EntityValue: \" or ' expected\n";
249 break;
250 case XML_ERR_ATTRIBUTE_NOT_STARTED:
251 errmsg = "AttValue: \" or ' expected\n";
252 break;
253 case XML_ERR_LT_IN_ATTRIBUTE:
254 errmsg = "Unescaped '<' not allowed in attributes values\n";
255 break;
256 case XML_ERR_LITERAL_NOT_STARTED:
257 errmsg = "SystemLiteral \" or ' expected\n";
258 break;
259 case XML_ERR_LITERAL_NOT_FINISHED:
260 errmsg = "Unfinished System or Public ID \" or ' expected\n";
261 break;
262 case XML_ERR_MISPLACED_CDATA_END:
263 errmsg = "Sequence ']]>' not allowed in content\n";
264 break;
265 case XML_ERR_URI_REQUIRED:
266 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
267 break;
268 case XML_ERR_PUBID_REQUIRED:
269 errmsg = "PUBLIC, the Public Identifier is missing\n";
270 break;
271 case XML_ERR_HYPHEN_IN_COMMENT:
272 errmsg = "Comment must not contain '--' (double-hyphen)\n";
273 break;
274 case XML_ERR_PI_NOT_STARTED:
275 errmsg = "xmlParsePI : no target name\n";
276 break;
277 case XML_ERR_RESERVED_XML_NAME:
278 errmsg = "Invalid PI name\n";
279 break;
280 case XML_ERR_NOTATION_NOT_STARTED:
281 errmsg = "NOTATION: Name expected here\n";
282 break;
283 case XML_ERR_NOTATION_NOT_FINISHED:
284 errmsg = "'>' required to close NOTATION declaration\n";
285 break;
286 case XML_ERR_VALUE_REQUIRED:
287 errmsg = "Entity value required\n";
288 break;
289 case XML_ERR_URI_FRAGMENT:
290 errmsg = "Fragment not allowed";
291 break;
292 case XML_ERR_ATTLIST_NOT_STARTED:
293 errmsg = "'(' required to start ATTLIST enumeration\n";
294 break;
295 case XML_ERR_NMTOKEN_REQUIRED:
296 errmsg = "NmToken expected in ATTLIST enumeration\n";
297 break;
298 case XML_ERR_ATTLIST_NOT_FINISHED:
299 errmsg = "')' required to finish ATTLIST enumeration\n";
300 break;
301 case XML_ERR_MIXED_NOT_STARTED:
302 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
303 break;
304 case XML_ERR_PCDATA_REQUIRED:
305 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
306 break;
307 case XML_ERR_ELEMCONTENT_NOT_STARTED:
308 errmsg = "ContentDecl : Name or '(' expected\n";
309 break;
310 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
311 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
312 break;
313 case XML_ERR_PEREF_IN_INT_SUBSET:
314 errmsg = "PEReference: forbidden within markup decl in internal subset\n";
315 break;
316 case XML_ERR_GT_REQUIRED:
317 errmsg = "expected '>'\n";
318 break;
319 case XML_ERR_CONDSEC_INVALID:
320 errmsg = "XML conditional section '[' expected\n";
321 break;
322 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
323 errmsg = "Content error in the external subset\n";
324 break;
325 case XML_ERR_CONDSEC_INVALID_KEYWORD:
326 errmsg = "conditional section INCLUDE or IGNORE keyword expected\n";
327 break;
328 case XML_ERR_CONDSEC_NOT_FINISHED:
329 errmsg = "XML conditional section not closed\n";
330 break;
331 case XML_ERR_XMLDECL_NOT_STARTED:
332 errmsg = "Text declaration '<?xml' required\n";
333 break;
334 case XML_ERR_XMLDECL_NOT_FINISHED:
335 errmsg = "parsing XML declaration: '?>' expected\n";
336 break;
337 case XML_ERR_EXT_ENTITY_STANDALONE:
338 errmsg = "external parsed entities cannot be standalone\n";
339 break;
340 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
341 errmsg = "EntityRef: expecting ';'\n";
342 break;
343 case XML_ERR_DOCTYPE_NOT_FINISHED:
344 errmsg = "DOCTYPE improperly terminated\n";
345 break;
346 case XML_ERR_LTSLASH_REQUIRED:
347 errmsg = "EndTag: '</' not found\n";
348 break;
349 case XML_ERR_EQUAL_REQUIRED:
350 errmsg = "expected '='\n";
351 break;
352 case XML_ERR_STRING_NOT_CLOSED:
353 errmsg = "String not closed expecting \" or '\n";
354 break;
355 case XML_ERR_STRING_NOT_STARTED:
356 errmsg = "String not started expecting ' or \"\n";
357 break;
358 case XML_ERR_ENCODING_NAME:
359 errmsg = "Invalid XML encoding name\n";
360 break;
361 case XML_ERR_STANDALONE_VALUE:
362 errmsg = "standalone accepts only 'yes' or 'no'\n";
363 break;
364 case XML_ERR_DOCUMENT_EMPTY:
365 errmsg = "Document is empty\n";
366 break;
367 case XML_ERR_DOCUMENT_END:
368 errmsg = "Extra content at the end of the document\n";
369 break;
370 case XML_ERR_NOT_WELL_BALANCED:
371 errmsg = "chunk is not well balanced\n";
372 break;
373 case XML_ERR_EXTRA_CONTENT:
374 errmsg = "extra content at the end of well balanced chunk\n";
375 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000376 case XML_ERR_VERSION_MISSING:
377 errmsg = "Malformed declaration expecting version\n";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379#if 0
380 case :
381 errmsg = "\n";
382 break;
383#endif
384 default:
385 errmsg = "Unregistered error message\n";
386 }
387 ctxt->errNo = error;
388 if (info == NULL) {
389 ctxt->sax->error(ctxt->userData, errmsg);
390 } else {
391 ctxt->sax->error(ctxt->userData, "%s: %s", errmsg, info);
392 }
393 ctxt->wellFormed = 0;
394 if (ctxt->recovery == 0)
395 ctxt->disableSAX = 1;
396}
397
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000398/**
399 * xmlFatalErrMsg:
400 * @ctxt: an XML parser context
401 * @error: the error number
402 * @msg: the error message
403 *
404 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
405 */
406static void
407xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg)
408{
409 if (ctxt == NULL) {
410 xmlGenericError(xmlGenericErrorContext,
411 "xmlFatalErr: no context !\n");
412 return;
413 }
414 ctxt->errNo = error;
415 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
416 return;
417 ctxt->sax->error(ctxt->userData, msg);
418 ctxt->wellFormed = 0;
419 if (ctxt->recovery == 0)
420 ctxt->disableSAX = 1;
421}
422
423/**
424 * xmlFatalErrMsgInt:
425 * @ctxt: an XML parser context
426 * @error: the error number
427 * @msg: the error message
428 * @val: an integer value
429 *
430 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
431 */
432static void
433xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
434 const char *msg, int val)
435{
436 if (ctxt == NULL) {
437 xmlGenericError(xmlGenericErrorContext,
438 "xmlFatalErr: no context !\n");
439 return;
440 }
441 ctxt->errNo = error;
442 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
443 return;
444 ctxt->sax->error(ctxt->userData, msg, val);
445 ctxt->wellFormed = 0;
446 if (ctxt->recovery == 0)
447 ctxt->disableSAX = 1;
448}
449
450/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000451 * xmlFatalErrMsgStr:
452 * @ctxt: an XML parser context
453 * @error: the error number
454 * @msg: the error message
455 * @val: a string value
456 *
457 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
458 */
459static void
460xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
461 const char *msg, const xmlChar *val)
462{
463 if (ctxt == NULL) {
464 xmlGenericError(xmlGenericErrorContext,
465 "xmlFatalErr: no context !\n");
466 return;
467 }
468 ctxt->errNo = error;
469 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
470 return;
471 ctxt->sax->error(ctxt->userData, msg, val);
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478 * xmlNsErr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the message
482 * @info1: extra information string
483 * @info2: extra information string
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487static void
488xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg,
490 const xmlChar *info1, const xmlChar *info2, const xmlChar *info3)
491{
492 if (ctxt == NULL)
493 return;
494 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
495 return;
496
497 ctxt->errNo = error;
498 if (info1 == NULL) {
499 ctxt->sax->error(ctxt->userData, msg);
500 } else if (info2 == NULL) {
501 ctxt->sax->error(ctxt->userData, msg, info1);
502 } else if (info3 == NULL) {
503 ctxt->sax->error(ctxt->userData, msg, info1, info2);
504 } else {
505 ctxt->sax->error(ctxt->userData, msg, info1, info2, info3);
506 }
507 ctxt->nsWellFormed = 0;
508}
509
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510/************************************************************************
511 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000512 * SAX2 defaulted attributes handling *
513 * *
514 ************************************************************************/
515
516/**
517 * xmlDetectSAX2:
518 * @ctxt: an XML parser context
519 *
520 * Do the SAX2 detection and specific intialization
521 */
522static void
523xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
524 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000525#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000526 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
527 ((ctxt->sax->startElementNs != NULL) ||
528 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000529#else
530 ctxt->sax2 = 1;
531#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000532
533 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
534 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
535 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
536}
537
Daniel Veillarde57ec792003-09-10 10:50:59 +0000538typedef struct _xmlDefAttrs xmlDefAttrs;
539typedef xmlDefAttrs *xmlDefAttrsPtr;
540struct _xmlDefAttrs {
541 int nbAttrs; /* number of defaulted attributes on that element */
542 int maxAttrs; /* the size of the array */
543 const xmlChar *values[4]; /* array of localname/prefix/values */
544};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000545
546/**
547 * xmlAddDefAttrs:
548 * @ctxt: an XML parser context
549 * @fullname: the element fullname
550 * @fullattr: the attribute fullname
551 * @value: the attribute value
552 *
553 * Add a defaulted attribute for an element
554 */
555static void
556xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
557 const xmlChar *fullname,
558 const xmlChar *fullattr,
559 const xmlChar *value) {
560 xmlDefAttrsPtr defaults;
561 int len;
562 const xmlChar *name;
563 const xmlChar *prefix;
564
565 if (ctxt->attsDefault == NULL) {
566 ctxt->attsDefault = xmlHashCreate(10);
567 if (ctxt->attsDefault == NULL)
568 goto mem_error;
569 }
570
571 /*
572 * plit the element name into prefix:localname , the string found
573 * are within the DTD and hen not associated to namespace names.
574 */
575 name = xmlSplitQName3(fullname, &len);
576 if (name == NULL) {
577 name = xmlDictLookup(ctxt->dict, fullname, -1);
578 prefix = NULL;
579 } else {
580 name = xmlDictLookup(ctxt->dict, name, -1);
581 prefix = xmlDictLookup(ctxt->dict, fullname, len);
582 }
583
584 /*
585 * make sure there is some storage
586 */
587 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
588 if (defaults == NULL) {
589 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
590 12 * sizeof(const xmlChar *));
591 if (defaults == NULL)
592 goto mem_error;
593 defaults->maxAttrs = 4;
594 defaults->nbAttrs = 0;
595 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
596 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
597 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
598 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
599 if (defaults == NULL)
600 goto mem_error;
601 defaults->maxAttrs *= 2;
602 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
603 }
604
605 /*
606 * plit the element name into prefix:localname , the string found
607 * are within the DTD and hen not associated to namespace names.
608 */
609 name = xmlSplitQName3(fullattr, &len);
610 if (name == NULL) {
611 name = xmlDictLookup(ctxt->dict, fullattr, -1);
612 prefix = NULL;
613 } else {
614 name = xmlDictLookup(ctxt->dict, name, -1);
615 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
616 }
617
618 defaults->values[4 * defaults->nbAttrs] = name;
619 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
620 /* intern the string and precompute the end */
621 len = xmlStrlen(value);
622 value = xmlDictLookup(ctxt->dict, value, len);
623 defaults->values[4 * defaults->nbAttrs + 2] = value;
624 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
625 defaults->nbAttrs++;
626
627 return;
628
629mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000630 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000631 return;
632}
633
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000634/**
635 * xmlAddSpecialAttr:
636 * @ctxt: an XML parser context
637 * @fullname: the element fullname
638 * @fullattr: the attribute fullname
639 * @type: the attribute type
640 *
641 * Register that this attribute is not CDATA
642 */
643static void
644xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
645 const xmlChar *fullname,
646 const xmlChar *fullattr,
647 int type)
648{
649 if (ctxt->attsSpecial == NULL) {
650 ctxt->attsSpecial = xmlHashCreate(10);
651 if (ctxt->attsSpecial == NULL)
652 goto mem_error;
653 }
654
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000655 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
656 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000657 return;
658
659mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000660 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000661 return;
662}
663
Daniel Veillard4432df22003-09-28 18:58:27 +0000664/**
665 * xmlCheckLanguageID:
666 * @lang: pointer to the string value
667 *
668 * Checks that the value conforms to the LanguageID production:
669 *
670 * NOTE: this is somewhat deprecated, those productions were removed from
671 * the XML Second edition.
672 *
673 * [33] LanguageID ::= Langcode ('-' Subcode)*
674 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
675 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
676 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
677 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
678 * [38] Subcode ::= ([a-z] | [A-Z])+
679 *
680 * Returns 1 if correct 0 otherwise
681 **/
682int
683xmlCheckLanguageID(const xmlChar * lang)
684{
685 const xmlChar *cur = lang;
686
687 if (cur == NULL)
688 return (0);
689 if (((cur[0] == 'i') && (cur[1] == '-')) ||
690 ((cur[0] == 'I') && (cur[1] == '-'))) {
691 /*
692 * IANA code
693 */
694 cur += 2;
695 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
696 ((cur[0] >= 'a') && (cur[0] <= 'z')))
697 cur++;
698 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
699 ((cur[0] == 'X') && (cur[1] == '-'))) {
700 /*
701 * User code
702 */
703 cur += 2;
704 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
705 ((cur[0] >= 'a') && (cur[0] <= 'z')))
706 cur++;
707 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
708 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
709 /*
710 * ISO639
711 */
712 cur++;
713 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
714 ((cur[0] >= 'a') && (cur[0] <= 'z')))
715 cur++;
716 else
717 return (0);
718 } else
719 return (0);
720 while (cur[0] != 0) { /* non input consuming */
721 if (cur[0] != '-')
722 return (0);
723 cur++;
724 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
725 ((cur[0] >= 'a') && (cur[0] <= 'z')))
726 cur++;
727 else
728 return (0);
729 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
730 ((cur[0] >= 'a') && (cur[0] <= 'z')))
731 cur++;
732 }
733 return (1);
734}
735
Owen Taylor3473f882001-02-23 17:55:21 +0000736/************************************************************************
737 * *
738 * Parser stacks related functions and macros *
739 * *
740 ************************************************************************/
741
742xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
743 const xmlChar ** str);
744
Daniel Veillard0fb18932003-09-07 09:14:37 +0000745#ifdef SAX2
746/**
747 * nsPush:
748 * @ctxt: an XML parser context
749 * @prefix: the namespace prefix or NULL
750 * @URL: the namespace name
751 *
752 * Pushes a new parser namespace on top of the ns stack
753 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000754 * Returns -1 in case of error, -2 if the namespace should be discarded
755 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000756 */
757static int
758nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
759{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000760 if (ctxt->options & XML_PARSE_NSCLEAN) {
761 int i;
762 for (i = 0;i < ctxt->nsNr;i += 2) {
763 if (ctxt->nsTab[i] == prefix) {
764 /* in scope */
765 if (ctxt->nsTab[i + 1] == URL)
766 return(-2);
767 /* out of scope keep it */
768 break;
769 }
770 }
771 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000772 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
773 ctxt->nsMax = 10;
774 ctxt->nsNr = 0;
775 ctxt->nsTab = (const xmlChar **)
776 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
777 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000778 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000779 ctxt->nsMax = 0;
780 return (-1);
781 }
782 } else if (ctxt->nsNr >= ctxt->nsMax) {
783 ctxt->nsMax *= 2;
784 ctxt->nsTab = (const xmlChar **)
785 xmlRealloc(ctxt->nsTab,
786 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
787 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000788 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000789 ctxt->nsMax /= 2;
790 return (-1);
791 }
792 }
793 ctxt->nsTab[ctxt->nsNr++] = prefix;
794 ctxt->nsTab[ctxt->nsNr++] = URL;
795 return (ctxt->nsNr);
796}
797/**
798 * nsPop:
799 * @ctxt: an XML parser context
800 * @nr: the number to pop
801 *
802 * Pops the top @nr parser prefix/namespace from the ns stack
803 *
804 * Returns the number of namespaces removed
805 */
806static int
807nsPop(xmlParserCtxtPtr ctxt, int nr)
808{
809 int i;
810
811 if (ctxt->nsTab == NULL) return(0);
812 if (ctxt->nsNr < nr) {
813 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
814 nr = ctxt->nsNr;
815 }
816 if (ctxt->nsNr <= 0)
817 return (0);
818
819 for (i = 0;i < nr;i++) {
820 ctxt->nsNr--;
821 ctxt->nsTab[ctxt->nsNr] = NULL;
822 }
823 return(nr);
824}
825#endif
826
827static int
828xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
829 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000830 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000831 int maxatts;
832
833 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000834 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000835 atts = (const xmlChar **)
836 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000837 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000838 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000839 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
840 if (attallocs == NULL) goto mem_error;
841 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000842 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000843 } else if (nr + 5 > ctxt->maxatts) {
844 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000845 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
846 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000847 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000848 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000849 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
850 (maxatts / 5) * sizeof(int));
851 if (attallocs == NULL) goto mem_error;
852 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000853 ctxt->maxatts = maxatts;
854 }
855 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000856mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000857 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000859}
860
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000861/**
862 * inputPush:
863 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000864 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000865 *
866 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000867 *
868 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000869 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000870extern int
871inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
872{
873 if (ctxt->inputNr >= ctxt->inputMax) {
874 ctxt->inputMax *= 2;
875 ctxt->inputTab =
876 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
877 ctxt->inputMax *
878 sizeof(ctxt->inputTab[0]));
879 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000880 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000881 return (0);
882 }
883 }
884 ctxt->inputTab[ctxt->inputNr] = value;
885 ctxt->input = value;
886 return (ctxt->inputNr++);
887}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000888/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000889 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000890 * @ctxt: an XML parser context
891 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000892 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000893 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000894 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000895 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000896extern xmlParserInputPtr
897inputPop(xmlParserCtxtPtr ctxt)
898{
899 xmlParserInputPtr ret;
900
901 if (ctxt->inputNr <= 0)
902 return (0);
903 ctxt->inputNr--;
904 if (ctxt->inputNr > 0)
905 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
906 else
907 ctxt->input = NULL;
908 ret = ctxt->inputTab[ctxt->inputNr];
909 ctxt->inputTab[ctxt->inputNr] = 0;
910 return (ret);
911}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000912/**
913 * nodePush:
914 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000915 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000916 *
917 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000918 *
919 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000920 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000921extern int
922nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
923{
924 if (ctxt->nodeNr >= ctxt->nodeMax) {
925 ctxt->nodeMax *= 2;
926 ctxt->nodeTab =
927 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
928 ctxt->nodeMax *
929 sizeof(ctxt->nodeTab[0]));
930 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000931 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000932 return (0);
933 }
934 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000935#ifdef MAX_DEPTH
936 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000937 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000938 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000939 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000940 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000941 return(0);
942 }
943#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000944 ctxt->nodeTab[ctxt->nodeNr] = value;
945 ctxt->node = value;
946 return (ctxt->nodeNr++);
947}
948/**
949 * nodePop:
950 * @ctxt: an XML parser context
951 *
952 * Pops the top element node from the node stack
953 *
954 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000955 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000956extern xmlNodePtr
957nodePop(xmlParserCtxtPtr ctxt)
958{
959 xmlNodePtr ret;
960
961 if (ctxt->nodeNr <= 0)
962 return (0);
963 ctxt->nodeNr--;
964 if (ctxt->nodeNr > 0)
965 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
966 else
967 ctxt->node = NULL;
968 ret = ctxt->nodeTab[ctxt->nodeNr];
969 ctxt->nodeTab[ctxt->nodeNr] = 0;
970 return (ret);
971}
972/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000973 * nameNsPush:
974 * @ctxt: an XML parser context
975 * @value: the element name
976 * @prefix: the element prefix
977 * @URI: the element namespace name
978 *
979 * Pushes a new element name/prefix/URL on top of the name stack
980 *
981 * Returns -1 in case of error, the index in the stack otherwise
982 */
983static int
984nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
985 const xmlChar *prefix, const xmlChar *URI, int nsNr)
986{
987 if (ctxt->nameNr >= ctxt->nameMax) {
988 const xmlChar * *tmp;
989 void **tmp2;
990 ctxt->nameMax *= 2;
991 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
992 ctxt->nameMax *
993 sizeof(ctxt->nameTab[0]));
994 if (tmp == NULL) {
995 ctxt->nameMax /= 2;
996 goto mem_error;
997 }
998 ctxt->nameTab = tmp;
999 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1000 ctxt->nameMax * 3 *
1001 sizeof(ctxt->pushTab[0]));
1002 if (tmp2 == NULL) {
1003 ctxt->nameMax /= 2;
1004 goto mem_error;
1005 }
1006 ctxt->pushTab = tmp2;
1007 }
1008 ctxt->nameTab[ctxt->nameNr] = value;
1009 ctxt->name = value;
1010 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1011 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001012 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001013 return (ctxt->nameNr++);
1014mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001015 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001016 return (-1);
1017}
1018/**
1019 * nameNsPop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element/prefix/URI name from the name stack
1023 *
1024 * Returns the name just removed
1025 */
1026static const xmlChar *
1027nameNsPop(xmlParserCtxtPtr ctxt)
1028{
1029 const xmlChar *ret;
1030
1031 if (ctxt->nameNr <= 0)
1032 return (0);
1033 ctxt->nameNr--;
1034 if (ctxt->nameNr > 0)
1035 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1036 else
1037 ctxt->name = NULL;
1038 ret = ctxt->nameTab[ctxt->nameNr];
1039 ctxt->nameTab[ctxt->nameNr] = NULL;
1040 return (ret);
1041}
1042
1043/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001044 * namePush:
1045 * @ctxt: an XML parser context
1046 * @value: the element name
1047 *
1048 * Pushes a new element name on top of the name stack
1049 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001051 */
1052extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001053namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001054{
1055 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001057 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001058 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001059 ctxt->nameMax *
1060 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001061 if (tmp == NULL) {
1062 ctxt->nameMax /= 2;
1063 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001064 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001065 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001066 }
1067 ctxt->nameTab[ctxt->nameNr] = value;
1068 ctxt->name = value;
1069 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001070mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001071 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001072 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001073}
1074/**
1075 * namePop:
1076 * @ctxt: an XML parser context
1077 *
1078 * Pops the top element name from the name stack
1079 *
1080 * Returns the name just removed
1081 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001082extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001083namePop(xmlParserCtxtPtr ctxt)
1084{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001085 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001086
1087 if (ctxt->nameNr <= 0)
1088 return (0);
1089 ctxt->nameNr--;
1090 if (ctxt->nameNr > 0)
1091 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1092 else
1093 ctxt->name = NULL;
1094 ret = ctxt->nameTab[ctxt->nameNr];
1095 ctxt->nameTab[ctxt->nameNr] = 0;
1096 return (ret);
1097}
Owen Taylor3473f882001-02-23 17:55:21 +00001098
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001099static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001100 if (ctxt->spaceNr >= ctxt->spaceMax) {
1101 ctxt->spaceMax *= 2;
1102 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1103 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1104 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001105 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001106 return(0);
1107 }
1108 }
1109 ctxt->spaceTab[ctxt->spaceNr] = val;
1110 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1111 return(ctxt->spaceNr++);
1112}
1113
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001114static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001115 int ret;
1116 if (ctxt->spaceNr <= 0) return(0);
1117 ctxt->spaceNr--;
1118 if (ctxt->spaceNr > 0)
1119 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1120 else
1121 ctxt->space = NULL;
1122 ret = ctxt->spaceTab[ctxt->spaceNr];
1123 ctxt->spaceTab[ctxt->spaceNr] = -1;
1124 return(ret);
1125}
1126
1127/*
1128 * Macros for accessing the content. Those should be used only by the parser,
1129 * and not exported.
1130 *
1131 * Dirty macros, i.e. one often need to make assumption on the context to
1132 * use them
1133 *
1134 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1135 * To be used with extreme caution since operations consuming
1136 * characters may move the input buffer to a different location !
1137 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1138 * This should be used internally by the parser
1139 * only to compare to ASCII values otherwise it would break when
1140 * running with UTF-8 encoding.
1141 * RAW same as CUR but in the input buffer, bypass any token
1142 * extraction that may have been done
1143 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1144 * to compare on ASCII based substring.
1145 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001146 * strings without newlines within the parser.
1147 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1148 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001149 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1150 *
1151 * NEXT Skip to the next character, this does the proper decoding
1152 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001153 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001154 * CUR_CHAR(l) returns the current unicode character (int), set l
1155 * to the number of xmlChars used for the encoding [0-5].
1156 * CUR_SCHAR same but operate on a string instead of the context
1157 * COPY_BUF copy the current unicode char to the target buffer, increment
1158 * the index
1159 * GROW, SHRINK handling of input buffers
1160 */
1161
Daniel Veillardfdc91562002-07-01 21:52:03 +00001162#define RAW (*ctxt->input->cur)
1163#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001164#define NXT(val) ctxt->input->cur[(val)]
1165#define CUR_PTR ctxt->input->cur
1166
1167#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001168 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001169 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001170 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001171 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1172 xmlPopInput(ctxt); \
1173 } while (0)
1174
Daniel Veillarda880b122003-04-21 21:36:41 +00001175#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001176 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1177 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001178 xmlSHRINK (ctxt);
1179
1180static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1181 xmlParserInputShrink(ctxt->input);
1182 if ((*ctxt->input->cur == 0) &&
1183 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1184 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001185 }
Owen Taylor3473f882001-02-23 17:55:21 +00001186
Daniel Veillarda880b122003-04-21 21:36:41 +00001187#define GROW if ((ctxt->progressive == 0) && \
1188 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001189 xmlGROW (ctxt);
1190
1191static void xmlGROW (xmlParserCtxtPtr ctxt) {
1192 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1193 if ((*ctxt->input->cur == 0) &&
1194 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1195 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001196}
Owen Taylor3473f882001-02-23 17:55:21 +00001197
1198#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1199
1200#define NEXT xmlNextChar(ctxt)
1201
Daniel Veillard21a0f912001-02-25 19:54:14 +00001202#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001203 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001204 ctxt->input->cur++; \
1205 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001206 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001207 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1208 }
1209
Owen Taylor3473f882001-02-23 17:55:21 +00001210#define NEXTL(l) do { \
1211 if (*(ctxt->input->cur) == '\n') { \
1212 ctxt->input->line++; ctxt->input->col = 1; \
1213 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001214 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001215 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001216 } while (0)
1217
1218#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1219#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1220
1221#define COPY_BUF(l,b,i,v) \
1222 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001223 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001224
1225/**
1226 * xmlSkipBlankChars:
1227 * @ctxt: the XML parser context
1228 *
1229 * skip all blanks character found at that point in the input streams.
1230 * It pops up finished entities in the process if allowable at that point.
1231 *
1232 * Returns the number of space chars skipped
1233 */
1234
1235int
1236xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001237 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001238
1239 /*
1240 * It's Okay to use CUR/NEXT here since all the blanks are on
1241 * the ASCII range.
1242 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001243 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1244 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001245 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001246 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001248 cur = ctxt->input->cur;
1249 while (IS_BLANK(*cur)) {
1250 if (*cur == '\n') {
1251 ctxt->input->line++; ctxt->input->col = 1;
1252 }
1253 cur++;
1254 res++;
1255 if (*cur == 0) {
1256 ctxt->input->cur = cur;
1257 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1258 cur = ctxt->input->cur;
1259 }
1260 }
1261 ctxt->input->cur = cur;
1262 } else {
1263 int cur;
1264 do {
1265 cur = CUR;
1266 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1267 NEXT;
1268 cur = CUR;
1269 res++;
1270 }
1271 while ((cur == 0) && (ctxt->inputNr > 1) &&
1272 (ctxt->instate != XML_PARSER_COMMENT)) {
1273 xmlPopInput(ctxt);
1274 cur = CUR;
1275 }
1276 /*
1277 * Need to handle support of entities branching here
1278 */
1279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1280 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1281 }
Owen Taylor3473f882001-02-23 17:55:21 +00001282 return(res);
1283}
1284
1285/************************************************************************
1286 * *
1287 * Commodity functions to handle entities *
1288 * *
1289 ************************************************************************/
1290
1291/**
1292 * xmlPopInput:
1293 * @ctxt: an XML parser context
1294 *
1295 * xmlPopInput: the current input pointed by ctxt->input came to an end
1296 * pop it and return the next char.
1297 *
1298 * Returns the current xmlChar in the parser context
1299 */
1300xmlChar
1301xmlPopInput(xmlParserCtxtPtr ctxt) {
1302 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1303 if (xmlParserDebugEntities)
1304 xmlGenericError(xmlGenericErrorContext,
1305 "Popping input %d\n", ctxt->inputNr);
1306 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001307 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001308 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1309 return(xmlPopInput(ctxt));
1310 return(CUR);
1311}
1312
1313/**
1314 * xmlPushInput:
1315 * @ctxt: an XML parser context
1316 * @input: an XML parser input fragment (entity, XML fragment ...).
1317 *
1318 * xmlPushInput: switch to a new input stream which is stacked on top
1319 * of the previous one(s).
1320 */
1321void
1322xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1323 if (input == NULL) return;
1324
1325 if (xmlParserDebugEntities) {
1326 if ((ctxt->input != NULL) && (ctxt->input->filename))
1327 xmlGenericError(xmlGenericErrorContext,
1328 "%s(%d): ", ctxt->input->filename,
1329 ctxt->input->line);
1330 xmlGenericError(xmlGenericErrorContext,
1331 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1332 }
1333 inputPush(ctxt, input);
1334 GROW;
1335}
1336
1337/**
1338 * xmlParseCharRef:
1339 * @ctxt: an XML parser context
1340 *
1341 * parse Reference declarations
1342 *
1343 * [66] CharRef ::= '&#' [0-9]+ ';' |
1344 * '&#x' [0-9a-fA-F]+ ';'
1345 *
1346 * [ WFC: Legal Character ]
1347 * Characters referred to using character references must match the
1348 * production for Char.
1349 *
1350 * Returns the value parsed (as an int), 0 in case of error
1351 */
1352int
1353xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001354 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001355 int count = 0;
1356
Owen Taylor3473f882001-02-23 17:55:21 +00001357 /*
1358 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1359 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001360 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001361 (NXT(2) == 'x')) {
1362 SKIP(3);
1363 GROW;
1364 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001365 if (count++ > 20) {
1366 count = 0;
1367 GROW;
1368 }
1369 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001370 val = val * 16 + (CUR - '0');
1371 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1372 val = val * 16 + (CUR - 'a') + 10;
1373 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1374 val = val * 16 + (CUR - 'A') + 10;
1375 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001376 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001377 val = 0;
1378 break;
1379 }
1380 NEXT;
1381 count++;
1382 }
1383 if (RAW == ';') {
1384 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001385 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001386 ctxt->nbChars ++;
1387 ctxt->input->cur++;
1388 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001389 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001390 SKIP(2);
1391 GROW;
1392 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001393 if (count++ > 20) {
1394 count = 0;
1395 GROW;
1396 }
1397 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001398 val = val * 10 + (CUR - '0');
1399 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001400 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001401 val = 0;
1402 break;
1403 }
1404 NEXT;
1405 count++;
1406 }
1407 if (RAW == ';') {
1408 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001409 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001410 ctxt->nbChars ++;
1411 ctxt->input->cur++;
1412 }
1413 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001414 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001415 }
1416
1417 /*
1418 * [ WFC: Legal Character ]
1419 * Characters referred to using character references must match the
1420 * production for Char.
1421 */
1422 if (IS_CHAR(val)) {
1423 return(val);
1424 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1426 "xmlParseCharRef: invalid xmlChar value %d\n",
1427 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001428 }
1429 return(0);
1430}
1431
1432/**
1433 * xmlParseStringCharRef:
1434 * @ctxt: an XML parser context
1435 * @str: a pointer to an index in the string
1436 *
1437 * parse Reference declarations, variant parsing from a string rather
1438 * than an an input flow.
1439 *
1440 * [66] CharRef ::= '&#' [0-9]+ ';' |
1441 * '&#x' [0-9a-fA-F]+ ';'
1442 *
1443 * [ WFC: Legal Character ]
1444 * Characters referred to using character references must match the
1445 * production for Char.
1446 *
1447 * Returns the value parsed (as an int), 0 in case of error, str will be
1448 * updated to the current value of the index
1449 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001450static int
Owen Taylor3473f882001-02-23 17:55:21 +00001451xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1452 const xmlChar *ptr;
1453 xmlChar cur;
1454 int val = 0;
1455
1456 if ((str == NULL) || (*str == NULL)) return(0);
1457 ptr = *str;
1458 cur = *ptr;
1459 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1460 ptr += 3;
1461 cur = *ptr;
1462 while (cur != ';') { /* Non input consuming loop */
1463 if ((cur >= '0') && (cur <= '9'))
1464 val = val * 16 + (cur - '0');
1465 else if ((cur >= 'a') && (cur <= 'f'))
1466 val = val * 16 + (cur - 'a') + 10;
1467 else if ((cur >= 'A') && (cur <= 'F'))
1468 val = val * 16 + (cur - 'A') + 10;
1469 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001470 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001471 val = 0;
1472 break;
1473 }
1474 ptr++;
1475 cur = *ptr;
1476 }
1477 if (cur == ';')
1478 ptr++;
1479 } else if ((cur == '&') && (ptr[1] == '#')){
1480 ptr += 2;
1481 cur = *ptr;
1482 while (cur != ';') { /* Non input consuming loops */
1483 if ((cur >= '0') && (cur <= '9'))
1484 val = val * 10 + (cur - '0');
1485 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001486 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001487 val = 0;
1488 break;
1489 }
1490 ptr++;
1491 cur = *ptr;
1492 }
1493 if (cur == ';')
1494 ptr++;
1495 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001496 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001497 return(0);
1498 }
1499 *str = ptr;
1500
1501 /*
1502 * [ WFC: Legal Character ]
1503 * Characters referred to using character references must match the
1504 * production for Char.
1505 */
1506 if (IS_CHAR(val)) {
1507 return(val);
1508 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001509 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1510 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1511 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001512 }
1513 return(0);
1514}
1515
1516/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001517 * xmlNewBlanksWrapperInputStream:
1518 * @ctxt: an XML parser context
1519 * @entity: an Entity pointer
1520 *
1521 * Create a new input stream for wrapping
1522 * blanks around a PEReference
1523 *
1524 * Returns the new input stream or NULL
1525 */
1526
1527static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1528
Daniel Veillardf4862f02002-09-10 11:13:43 +00001529static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001530xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1531 xmlParserInputPtr input;
1532 xmlChar *buffer;
1533 size_t length;
1534 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001535 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1536 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001537 return(NULL);
1538 }
1539 if (xmlParserDebugEntities)
1540 xmlGenericError(xmlGenericErrorContext,
1541 "new blanks wrapper for entity: %s\n", entity->name);
1542 input = xmlNewInputStream(ctxt);
1543 if (input == NULL) {
1544 return(NULL);
1545 }
1546 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001547 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001548 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001549 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001550 return(NULL);
1551 }
1552 buffer [0] = ' ';
1553 buffer [1] = '%';
1554 buffer [length-3] = ';';
1555 buffer [length-2] = ' ';
1556 buffer [length-1] = 0;
1557 memcpy(buffer + 2, entity->name, length - 5);
1558 input->free = deallocblankswrapper;
1559 input->base = buffer;
1560 input->cur = buffer;
1561 input->length = length;
1562 input->end = &buffer[length];
1563 return(input);
1564}
1565
1566/**
Owen Taylor3473f882001-02-23 17:55:21 +00001567 * xmlParserHandlePEReference:
1568 * @ctxt: the parser context
1569 *
1570 * [69] PEReference ::= '%' Name ';'
1571 *
1572 * [ WFC: No Recursion ]
1573 * A parsed entity must not contain a recursive
1574 * reference to itself, either directly or indirectly.
1575 *
1576 * [ WFC: Entity Declared ]
1577 * In a document without any DTD, a document with only an internal DTD
1578 * subset which contains no parameter entity references, or a document
1579 * with "standalone='yes'", ... ... The declaration of a parameter
1580 * entity must precede any reference to it...
1581 *
1582 * [ VC: Entity Declared ]
1583 * In a document with an external subset or external parameter entities
1584 * with "standalone='no'", ... ... The declaration of a parameter entity
1585 * must precede any reference to it...
1586 *
1587 * [ WFC: In DTD ]
1588 * Parameter-entity references may only appear in the DTD.
1589 * NOTE: misleading but this is handled.
1590 *
1591 * A PEReference may have been detected in the current input stream
1592 * the handling is done accordingly to
1593 * http://www.w3.org/TR/REC-xml#entproc
1594 * i.e.
1595 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001596 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001597 */
1598void
1599xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001600 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001601 xmlEntityPtr entity = NULL;
1602 xmlParserInputPtr input;
1603
Owen Taylor3473f882001-02-23 17:55:21 +00001604 if (RAW != '%') return;
1605 switch(ctxt->instate) {
1606 case XML_PARSER_CDATA_SECTION:
1607 return;
1608 case XML_PARSER_COMMENT:
1609 return;
1610 case XML_PARSER_START_TAG:
1611 return;
1612 case XML_PARSER_END_TAG:
1613 return;
1614 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001615 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001616 return;
1617 case XML_PARSER_PROLOG:
1618 case XML_PARSER_START:
1619 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001621 return;
1622 case XML_PARSER_ENTITY_DECL:
1623 case XML_PARSER_CONTENT:
1624 case XML_PARSER_ATTRIBUTE_VALUE:
1625 case XML_PARSER_PI:
1626 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001627 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001628 /* we just ignore it there */
1629 return;
1630 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001631 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001632 return;
1633 case XML_PARSER_ENTITY_VALUE:
1634 /*
1635 * NOTE: in the case of entity values, we don't do the
1636 * substitution here since we need the literal
1637 * entity value to be able to save the internal
1638 * subset of the document.
1639 * This will be handled by xmlStringDecodeEntities
1640 */
1641 return;
1642 case XML_PARSER_DTD:
1643 /*
1644 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1645 * In the internal DTD subset, parameter-entity references
1646 * can occur only where markup declarations can occur, not
1647 * within markup declarations.
1648 * In that case this is handled in xmlParseMarkupDecl
1649 */
1650 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1651 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001652 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1653 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001654 break;
1655 case XML_PARSER_IGNORE:
1656 return;
1657 }
1658
1659 NEXT;
1660 name = xmlParseName(ctxt);
1661 if (xmlParserDebugEntities)
1662 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001663 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001664 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001665 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 } else {
1667 if (RAW == ';') {
1668 NEXT;
1669 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1670 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1671 if (entity == NULL) {
1672
1673 /*
1674 * [ WFC: Entity Declared ]
1675 * In a document without any DTD, a document with only an
1676 * internal DTD subset which contains no parameter entity
1677 * references, or a document with "standalone='yes'", ...
1678 * ... The declaration of a parameter entity must precede
1679 * any reference to it...
1680 */
1681 if ((ctxt->standalone == 1) ||
1682 ((ctxt->hasExternalSubset == 0) &&
1683 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001684 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001685 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001686 } else {
1687 /*
1688 * [ VC: Entity Declared ]
1689 * In a document with an external subset or external
1690 * parameter entities with "standalone='no'", ...
1691 * ... The declaration of a parameter entity must precede
1692 * any reference to it...
1693 */
1694 if ((!ctxt->disableSAX) &&
1695 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1696 ctxt->vctxt.error(ctxt->vctxt.userData,
1697 "PEReference: %%%s; not found\n", name);
1698 } else if ((!ctxt->disableSAX) &&
1699 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1700 ctxt->sax->warning(ctxt->userData,
1701 "PEReference: %%%s; not found\n", name);
1702 ctxt->valid = 0;
1703 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001704 } else if (ctxt->input->free != deallocblankswrapper) {
1705 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1706 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001707 } else {
1708 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1709 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001710 xmlChar start[4];
1711 xmlCharEncoding enc;
1712
Owen Taylor3473f882001-02-23 17:55:21 +00001713 /*
1714 * handle the extra spaces added before and after
1715 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001716 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001717 */
1718 input = xmlNewEntityInputStream(ctxt, entity);
1719 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001720
1721 /*
1722 * Get the 4 first bytes and decode the charset
1723 * if enc != XML_CHAR_ENCODING_NONE
1724 * plug some encoding conversion routines.
1725 */
1726 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001727 if (entity->length >= 4) {
1728 start[0] = RAW;
1729 start[1] = NXT(1);
1730 start[2] = NXT(2);
1731 start[3] = NXT(3);
1732 enc = xmlDetectCharEncoding(start, 4);
1733 if (enc != XML_CHAR_ENCODING_NONE) {
1734 xmlSwitchEncoding(ctxt, enc);
1735 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001736 }
1737
Owen Taylor3473f882001-02-23 17:55:21 +00001738 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1739 (RAW == '<') && (NXT(1) == '?') &&
1740 (NXT(2) == 'x') && (NXT(3) == 'm') &&
1741 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1742 xmlParseTextDecl(ctxt);
1743 }
Owen Taylor3473f882001-02-23 17:55:21 +00001744 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001745 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1746 "PEReference: %s is not a parameter entity\n",
1747 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001748 }
1749 }
1750 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001751 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 }
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754}
1755
1756/*
1757 * Macro used to grow the current buffer.
1758 */
1759#define growBuffer(buffer) { \
1760 buffer##_size *= 2; \
1761 buffer = (xmlChar *) \
1762 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001763 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001764}
1765
1766/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001767 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001768 * @ctxt: the parser context
1769 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001770 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001771 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1772 * @end: an end marker xmlChar, 0 if none
1773 * @end2: an end marker xmlChar, 0 if none
1774 * @end3: an end marker xmlChar, 0 if none
1775 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001776 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001777 *
1778 * [67] Reference ::= EntityRef | CharRef
1779 *
1780 * [69] PEReference ::= '%' Name ';'
1781 *
1782 * Returns A newly allocated string with the substitution done. The caller
1783 * must deallocate it !
1784 */
1785xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001786xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1787 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001788 xmlChar *buffer = NULL;
1789 int buffer_size = 0;
1790
1791 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001792 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 xmlEntityPtr ent;
1794 int c,l;
1795 int nbchars = 0;
1796
Daniel Veillarde57ec792003-09-10 10:50:59 +00001797 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001798 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001799 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001800
1801 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001802 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 return(NULL);
1804 }
1805
1806 /*
1807 * allocate a translation buffer.
1808 */
1809 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001810 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001811 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001812
1813 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001814 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001815 * we are operating on already parsed values.
1816 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001817 if (str < last)
1818 c = CUR_SCHAR(str, l);
1819 else
1820 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001821 while ((c != 0) && (c != end) && /* non input consuming loop */
1822 (c != end2) && (c != end3)) {
1823
1824 if (c == 0) break;
1825 if ((c == '&') && (str[1] == '#')) {
1826 int val = xmlParseStringCharRef(ctxt, &str);
1827 if (val != 0) {
1828 COPY_BUF(0,buffer,nbchars,val);
1829 }
1830 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1831 if (xmlParserDebugEntities)
1832 xmlGenericError(xmlGenericErrorContext,
1833 "String decoding Entity Reference: %.30s\n",
1834 str);
1835 ent = xmlParseStringEntityRef(ctxt, &str);
1836 if ((ent != NULL) &&
1837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1838 if (ent->content != NULL) {
1839 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1840 } else {
1841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1842 ctxt->sax->error(ctxt->userData,
1843 "internal error entity has no content\n");
1844 }
1845 } else if ((ent != NULL) && (ent->content != NULL)) {
1846 xmlChar *rep;
1847
1848 ctxt->depth++;
1849 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1850 0, 0, 0);
1851 ctxt->depth--;
1852 if (rep != NULL) {
1853 current = rep;
1854 while (*current != 0) { /* non input consuming loop */
1855 buffer[nbchars++] = *current++;
1856 if (nbchars >
1857 buffer_size - XML_PARSER_BUFFER_SIZE) {
1858 growBuffer(buffer);
1859 }
1860 }
1861 xmlFree(rep);
1862 }
1863 } else if (ent != NULL) {
1864 int i = xmlStrlen(ent->name);
1865 const xmlChar *cur = ent->name;
1866
1867 buffer[nbchars++] = '&';
1868 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1869 growBuffer(buffer);
1870 }
1871 for (;i > 0;i--)
1872 buffer[nbchars++] = *cur++;
1873 buffer[nbchars++] = ';';
1874 }
1875 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1876 if (xmlParserDebugEntities)
1877 xmlGenericError(xmlGenericErrorContext,
1878 "String decoding PE Reference: %.30s\n", str);
1879 ent = xmlParseStringPEReference(ctxt, &str);
1880 if (ent != NULL) {
1881 xmlChar *rep;
1882
1883 ctxt->depth++;
1884 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1885 0, 0, 0);
1886 ctxt->depth--;
1887 if (rep != NULL) {
1888 current = rep;
1889 while (*current != 0) { /* non input consuming loop */
1890 buffer[nbchars++] = *current++;
1891 if (nbchars >
1892 buffer_size - XML_PARSER_BUFFER_SIZE) {
1893 growBuffer(buffer);
1894 }
1895 }
1896 xmlFree(rep);
1897 }
1898 }
1899 } else {
1900 COPY_BUF(l,buffer,nbchars,c);
1901 str += l;
1902 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1903 growBuffer(buffer);
1904 }
1905 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001906 if (str < last)
1907 c = CUR_SCHAR(str, l);
1908 else
1909 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001910 }
1911 buffer[nbchars++] = 0;
1912 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001913
1914mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001915 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001916 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001917}
1918
Daniel Veillarde57ec792003-09-10 10:50:59 +00001919/**
1920 * xmlStringDecodeEntities:
1921 * @ctxt: the parser context
1922 * @str: the input string
1923 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1924 * @end: an end marker xmlChar, 0 if none
1925 * @end2: an end marker xmlChar, 0 if none
1926 * @end3: an end marker xmlChar, 0 if none
1927 *
1928 * Takes a entity string content and process to do the adequate substitutions.
1929 *
1930 * [67] Reference ::= EntityRef | CharRef
1931 *
1932 * [69] PEReference ::= '%' Name ';'
1933 *
1934 * Returns A newly allocated string with the substitution done. The caller
1935 * must deallocate it !
1936 */
1937xmlChar *
1938xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1939 xmlChar end, xmlChar end2, xmlChar end3) {
1940 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1941 end, end2, end3));
1942}
Owen Taylor3473f882001-02-23 17:55:21 +00001943
1944/************************************************************************
1945 * *
1946 * Commodity functions to handle xmlChars *
1947 * *
1948 ************************************************************************/
1949
1950/**
1951 * xmlStrndup:
1952 * @cur: the input xmlChar *
1953 * @len: the len of @cur
1954 *
1955 * a strndup for array of xmlChar's
1956 *
1957 * Returns a new xmlChar * or NULL
1958 */
1959xmlChar *
1960xmlStrndup(const xmlChar *cur, int len) {
1961 xmlChar *ret;
1962
1963 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001964 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001965 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001966 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001967 return(NULL);
1968 }
1969 memcpy(ret, cur, len * sizeof(xmlChar));
1970 ret[len] = 0;
1971 return(ret);
1972}
1973
1974/**
1975 * xmlStrdup:
1976 * @cur: the input xmlChar *
1977 *
1978 * a strdup for array of xmlChar's. Since they are supposed to be
1979 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1980 * a termination mark of '0'.
1981 *
1982 * Returns a new xmlChar * or NULL
1983 */
1984xmlChar *
1985xmlStrdup(const xmlChar *cur) {
1986 const xmlChar *p = cur;
1987
1988 if (cur == NULL) return(NULL);
1989 while (*p != 0) p++; /* non input consuming */
1990 return(xmlStrndup(cur, p - cur));
1991}
1992
1993/**
1994 * xmlCharStrndup:
1995 * @cur: the input char *
1996 * @len: the len of @cur
1997 *
1998 * a strndup for char's to xmlChar's
1999 *
2000 * Returns a new xmlChar * or NULL
2001 */
2002
2003xmlChar *
2004xmlCharStrndup(const char *cur, int len) {
2005 int i;
2006 xmlChar *ret;
2007
2008 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002009 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002010 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002011 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002012 return(NULL);
2013 }
2014 for (i = 0;i < len;i++)
2015 ret[i] = (xmlChar) cur[i];
2016 ret[len] = 0;
2017 return(ret);
2018}
2019
2020/**
2021 * xmlCharStrdup:
2022 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002023 *
2024 * a strdup for char's to xmlChar's
2025 *
2026 * Returns a new xmlChar * or NULL
2027 */
2028
2029xmlChar *
2030xmlCharStrdup(const char *cur) {
2031 const char *p = cur;
2032
2033 if (cur == NULL) return(NULL);
2034 while (*p != '\0') p++; /* non input consuming */
2035 return(xmlCharStrndup(cur, p - cur));
2036}
2037
2038/**
2039 * xmlStrcmp:
2040 * @str1: the first xmlChar *
2041 * @str2: the second xmlChar *
2042 *
2043 * a strcmp for xmlChar's
2044 *
2045 * Returns the integer result of the comparison
2046 */
2047
2048int
2049xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2050 register int tmp;
2051
2052 if (str1 == str2) return(0);
2053 if (str1 == NULL) return(-1);
2054 if (str2 == NULL) return(1);
2055 do {
2056 tmp = *str1++ - *str2;
2057 if (tmp != 0) return(tmp);
2058 } while (*str2++ != 0);
2059 return 0;
2060}
2061
2062/**
2063 * xmlStrEqual:
2064 * @str1: the first xmlChar *
2065 * @str2: the second xmlChar *
2066 *
2067 * Check if both string are equal of have same content
2068 * Should be a bit more readable and faster than xmlStrEqual()
2069 *
2070 * Returns 1 if they are equal, 0 if they are different
2071 */
2072
2073int
2074xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2075 if (str1 == str2) return(1);
2076 if (str1 == NULL) return(0);
2077 if (str2 == NULL) return(0);
2078 do {
2079 if (*str1++ != *str2) return(0);
2080 } while (*str2++);
2081 return(1);
2082}
2083
2084/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002085 * xmlStrQEqual:
2086 * @pref: the prefix of the QName
2087 * @name: the localname of the QName
2088 * @str: the second xmlChar *
2089 *
2090 * Check if a QName is Equal to a given string
2091 *
2092 * Returns 1 if they are equal, 0 if they are different
2093 */
2094
2095int
2096xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2097 if (pref == NULL) return(xmlStrEqual(name, str));
2098 if (name == NULL) return(0);
2099 if (str == NULL) return(0);
2100
2101 do {
2102 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002103 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002104 if (*str++ != ':') return(0);
2105 do {
2106 if (*name++ != *str) return(0);
2107 } while (*str++);
2108 return(1);
2109}
2110
2111/**
Owen Taylor3473f882001-02-23 17:55:21 +00002112 * xmlStrncmp:
2113 * @str1: the first xmlChar *
2114 * @str2: the second xmlChar *
2115 * @len: the max comparison length
2116 *
2117 * a strncmp for xmlChar's
2118 *
2119 * Returns the integer result of the comparison
2120 */
2121
2122int
2123xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2124 register int tmp;
2125
2126 if (len <= 0) return(0);
2127 if (str1 == str2) return(0);
2128 if (str1 == NULL) return(-1);
2129 if (str2 == NULL) return(1);
2130 do {
2131 tmp = *str1++ - *str2;
2132 if (tmp != 0 || --len == 0) return(tmp);
2133 } while (*str2++ != 0);
2134 return 0;
2135}
2136
Daniel Veillardb44025c2001-10-11 22:55:55 +00002137static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2139 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2140 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2141 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2142 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2143 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2144 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2145 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2146 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2147 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2148 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2149 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2150 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2151 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2152 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2153 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2154 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2155 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2156 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2157 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2158 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2159 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2160 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2161 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2162 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2163 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2164 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2165 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2166 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2167 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2168 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2169 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2170};
2171
2172/**
2173 * xmlStrcasecmp:
2174 * @str1: the first xmlChar *
2175 * @str2: the second xmlChar *
2176 *
2177 * a strcasecmp for xmlChar's
2178 *
2179 * Returns the integer result of the comparison
2180 */
2181
2182int
2183xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2184 register int tmp;
2185
2186 if (str1 == str2) return(0);
2187 if (str1 == NULL) return(-1);
2188 if (str2 == NULL) return(1);
2189 do {
2190 tmp = casemap[*str1++] - casemap[*str2];
2191 if (tmp != 0) return(tmp);
2192 } while (*str2++ != 0);
2193 return 0;
2194}
2195
2196/**
2197 * xmlStrncasecmp:
2198 * @str1: the first xmlChar *
2199 * @str2: the second xmlChar *
2200 * @len: the max comparison length
2201 *
2202 * a strncasecmp for xmlChar's
2203 *
2204 * Returns the integer result of the comparison
2205 */
2206
2207int
2208xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2209 register int tmp;
2210
2211 if (len <= 0) return(0);
2212 if (str1 == str2) return(0);
2213 if (str1 == NULL) return(-1);
2214 if (str2 == NULL) return(1);
2215 do {
2216 tmp = casemap[*str1++] - casemap[*str2];
2217 if (tmp != 0 || --len == 0) return(tmp);
2218 } while (*str2++ != 0);
2219 return 0;
2220}
2221
2222/**
2223 * xmlStrchr:
2224 * @str: the xmlChar * array
2225 * @val: the xmlChar to search
2226 *
2227 * a strchr for xmlChar's
2228 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002229 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002230 */
2231
2232const xmlChar *
2233xmlStrchr(const xmlChar *str, xmlChar val) {
2234 if (str == NULL) return(NULL);
2235 while (*str != 0) { /* non input consuming */
2236 if (*str == val) return((xmlChar *) str);
2237 str++;
2238 }
2239 return(NULL);
2240}
2241
2242/**
2243 * xmlStrstr:
2244 * @str: the xmlChar * array (haystack)
2245 * @val: the xmlChar to search (needle)
2246 *
2247 * a strstr for xmlChar's
2248 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002249 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002250 */
2251
2252const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002253xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002254 int n;
2255
2256 if (str == NULL) return(NULL);
2257 if (val == NULL) return(NULL);
2258 n = xmlStrlen(val);
2259
2260 if (n == 0) return(str);
2261 while (*str != 0) { /* non input consuming */
2262 if (*str == *val) {
2263 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2264 }
2265 str++;
2266 }
2267 return(NULL);
2268}
2269
2270/**
2271 * xmlStrcasestr:
2272 * @str: the xmlChar * array (haystack)
2273 * @val: the xmlChar to search (needle)
2274 *
2275 * a case-ignoring strstr for xmlChar's
2276 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002277 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002278 */
2279
2280const xmlChar *
2281xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2282 int n;
2283
2284 if (str == NULL) return(NULL);
2285 if (val == NULL) return(NULL);
2286 n = xmlStrlen(val);
2287
2288 if (n == 0) return(str);
2289 while (*str != 0) { /* non input consuming */
2290 if (casemap[*str] == casemap[*val])
2291 if (!xmlStrncasecmp(str, val, n)) return(str);
2292 str++;
2293 }
2294 return(NULL);
2295}
2296
2297/**
2298 * xmlStrsub:
2299 * @str: the xmlChar * array (haystack)
2300 * @start: the index of the first char (zero based)
2301 * @len: the length of the substring
2302 *
2303 * Extract a substring of a given string
2304 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002305 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002306 */
2307
2308xmlChar *
2309xmlStrsub(const xmlChar *str, int start, int len) {
2310 int i;
2311
2312 if (str == NULL) return(NULL);
2313 if (start < 0) return(NULL);
2314 if (len < 0) return(NULL);
2315
2316 for (i = 0;i < start;i++) {
2317 if (*str == 0) return(NULL);
2318 str++;
2319 }
2320 if (*str == 0) return(NULL);
2321 return(xmlStrndup(str, len));
2322}
2323
2324/**
2325 * xmlStrlen:
2326 * @str: the xmlChar * array
2327 *
2328 * length of a xmlChar's string
2329 *
2330 * Returns the number of xmlChar contained in the ARRAY.
2331 */
2332
2333int
2334xmlStrlen(const xmlChar *str) {
2335 int len = 0;
2336
2337 if (str == NULL) return(0);
2338 while (*str != 0) { /* non input consuming */
2339 str++;
2340 len++;
2341 }
2342 return(len);
2343}
2344
2345/**
2346 * xmlStrncat:
2347 * @cur: the original xmlChar * array
2348 * @add: the xmlChar * array added
2349 * @len: the length of @add
2350 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002351 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002352 * first bytes of @add.
2353 *
2354 * Returns a new xmlChar *, the original @cur is reallocated if needed
2355 * and should not be freed
2356 */
2357
2358xmlChar *
2359xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2360 int size;
2361 xmlChar *ret;
2362
2363 if ((add == NULL) || (len == 0))
2364 return(cur);
2365 if (cur == NULL)
2366 return(xmlStrndup(add, len));
2367
2368 size = xmlStrlen(cur);
2369 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2370 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002371 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002372 return(cur);
2373 }
2374 memcpy(&ret[size], add, len * sizeof(xmlChar));
2375 ret[size + len] = 0;
2376 return(ret);
2377}
2378
2379/**
2380 * xmlStrcat:
2381 * @cur: the original xmlChar * array
2382 * @add: the xmlChar * array added
2383 *
2384 * a strcat for array of xmlChar's. Since they are supposed to be
2385 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2386 * a termination mark of '0'.
2387 *
2388 * Returns a new xmlChar * containing the concatenated string.
2389 */
2390xmlChar *
2391xmlStrcat(xmlChar *cur, const xmlChar *add) {
2392 const xmlChar *p = add;
2393
2394 if (add == NULL) return(cur);
2395 if (cur == NULL)
2396 return(xmlStrdup(add));
2397
2398 while (*p != 0) p++; /* non input consuming */
2399 return(xmlStrncat(cur, add, p - add));
2400}
2401
2402/************************************************************************
2403 * *
2404 * Commodity functions, cleanup needed ? *
2405 * *
2406 ************************************************************************/
2407
2408/**
2409 * areBlanks:
2410 * @ctxt: an XML parser context
2411 * @str: a xmlChar *
2412 * @len: the size of @str
2413 *
2414 * Is this a sequence of blank chars that one can ignore ?
2415 *
2416 * Returns 1 if ignorable 0 otherwise.
2417 */
2418
2419static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2420 int i, ret;
2421 xmlNodePtr lastChild;
2422
Daniel Veillard05c13a22001-09-09 08:38:09 +00002423 /*
2424 * Don't spend time trying to differentiate them, the same callback is
2425 * used !
2426 */
2427 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002428 return(0);
2429
Owen Taylor3473f882001-02-23 17:55:21 +00002430 /*
2431 * Check for xml:space value.
2432 */
2433 if (*(ctxt->space) == 1)
2434 return(0);
2435
2436 /*
2437 * Check that the string is made of blanks
2438 */
2439 for (i = 0;i < len;i++)
2440 if (!(IS_BLANK(str[i]))) return(0);
2441
2442 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002443 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002444 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002445 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002446 if (ctxt->myDoc != NULL) {
2447 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2448 if (ret == 0) return(1);
2449 if (ret == 1) return(0);
2450 }
2451
2452 /*
2453 * Otherwise, heuristic :-\
2454 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002455 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002456 if ((ctxt->node->children == NULL) &&
2457 (RAW == '<') && (NXT(1) == '/')) return(0);
2458
2459 lastChild = xmlGetLastChild(ctxt->node);
2460 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002461 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2462 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002463 } else if (xmlNodeIsText(lastChild))
2464 return(0);
2465 else if ((ctxt->node->children != NULL) &&
2466 (xmlNodeIsText(ctxt->node->children)))
2467 return(0);
2468 return(1);
2469}
2470
Owen Taylor3473f882001-02-23 17:55:21 +00002471/************************************************************************
2472 * *
2473 * Extra stuff for namespace support *
2474 * Relates to http://www.w3.org/TR/WD-xml-names *
2475 * *
2476 ************************************************************************/
2477
2478/**
2479 * xmlSplitQName:
2480 * @ctxt: an XML parser context
2481 * @name: an XML parser context
2482 * @prefix: a xmlChar **
2483 *
2484 * parse an UTF8 encoded XML qualified name string
2485 *
2486 * [NS 5] QName ::= (Prefix ':')? LocalPart
2487 *
2488 * [NS 6] Prefix ::= NCName
2489 *
2490 * [NS 7] LocalPart ::= NCName
2491 *
2492 * Returns the local part, and prefix is updated
2493 * to get the Prefix if any.
2494 */
2495
2496xmlChar *
2497xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2498 xmlChar buf[XML_MAX_NAMELEN + 5];
2499 xmlChar *buffer = NULL;
2500 int len = 0;
2501 int max = XML_MAX_NAMELEN;
2502 xmlChar *ret = NULL;
2503 const xmlChar *cur = name;
2504 int c;
2505
2506 *prefix = NULL;
2507
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002508 if (cur == NULL) return(NULL);
2509
Owen Taylor3473f882001-02-23 17:55:21 +00002510#ifndef XML_XML_NAMESPACE
2511 /* xml: prefix is not really a namespace */
2512 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2513 (cur[2] == 'l') && (cur[3] == ':'))
2514 return(xmlStrdup(name));
2515#endif
2516
Daniel Veillard597bc482003-07-24 16:08:28 +00002517 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002518 if (cur[0] == ':')
2519 return(xmlStrdup(name));
2520
2521 c = *cur++;
2522 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2523 buf[len++] = c;
2524 c = *cur++;
2525 }
2526 if (len >= max) {
2527 /*
2528 * Okay someone managed to make a huge name, so he's ready to pay
2529 * for the processing speed.
2530 */
2531 max = len * 2;
2532
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002533 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002534 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002535 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002536 return(NULL);
2537 }
2538 memcpy(buffer, buf, len);
2539 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2540 if (len + 10 > max) {
2541 max *= 2;
2542 buffer = (xmlChar *) xmlRealloc(buffer,
2543 max * sizeof(xmlChar));
2544 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002545 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002546 return(NULL);
2547 }
2548 }
2549 buffer[len++] = c;
2550 c = *cur++;
2551 }
2552 buffer[len] = 0;
2553 }
2554
Daniel Veillard597bc482003-07-24 16:08:28 +00002555 /* nasty but well=formed
2556 if ((c == ':') && (*cur == 0)) {
2557 return(xmlStrdup(name));
2558 } */
2559
Owen Taylor3473f882001-02-23 17:55:21 +00002560 if (buffer == NULL)
2561 ret = xmlStrndup(buf, len);
2562 else {
2563 ret = buffer;
2564 buffer = NULL;
2565 max = XML_MAX_NAMELEN;
2566 }
2567
2568
2569 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002570 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002571 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002572 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002573 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002574 }
Owen Taylor3473f882001-02-23 17:55:21 +00002575 len = 0;
2576
Daniel Veillardbb284f42002-10-16 18:02:47 +00002577 /*
2578 * Check that the first character is proper to start
2579 * a new name
2580 */
2581 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2582 ((c >= 0x41) && (c <= 0x5A)) ||
2583 (c == '_') || (c == ':'))) {
2584 int l;
2585 int first = CUR_SCHAR(cur, l);
2586
2587 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002588 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002589 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002590 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002591 }
2592 }
2593 cur++;
2594
Owen Taylor3473f882001-02-23 17:55:21 +00002595 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2596 buf[len++] = c;
2597 c = *cur++;
2598 }
2599 if (len >= max) {
2600 /*
2601 * Okay someone managed to make a huge name, so he's ready to pay
2602 * for the processing speed.
2603 */
2604 max = len * 2;
2605
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002606 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002607 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002608 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002609 return(NULL);
2610 }
2611 memcpy(buffer, buf, len);
2612 while (c != 0) { /* tested bigname2.xml */
2613 if (len + 10 > max) {
2614 max *= 2;
2615 buffer = (xmlChar *) xmlRealloc(buffer,
2616 max * sizeof(xmlChar));
2617 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002618 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 return(NULL);
2620 }
2621 }
2622 buffer[len++] = c;
2623 c = *cur++;
2624 }
2625 buffer[len] = 0;
2626 }
2627
2628 if (buffer == NULL)
2629 ret = xmlStrndup(buf, len);
2630 else {
2631 ret = buffer;
2632 }
2633 }
2634
2635 return(ret);
2636}
2637
2638/************************************************************************
2639 * *
2640 * The parser itself *
2641 * Relates to http://www.w3.org/TR/REC-xml *
2642 * *
2643 ************************************************************************/
2644
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002645static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002646static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002647 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002648
Owen Taylor3473f882001-02-23 17:55:21 +00002649/**
2650 * xmlParseName:
2651 * @ctxt: an XML parser context
2652 *
2653 * parse an XML name.
2654 *
2655 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2656 * CombiningChar | Extender
2657 *
2658 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2659 *
2660 * [6] Names ::= Name (S Name)*
2661 *
2662 * Returns the Name parsed or NULL
2663 */
2664
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002665const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002666xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002667 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002668 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002669 int count = 0;
2670
2671 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002672
2673 /*
2674 * Accelerator for simple ASCII names
2675 */
2676 in = ctxt->input->cur;
2677 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2678 ((*in >= 0x41) && (*in <= 0x5A)) ||
2679 (*in == '_') || (*in == ':')) {
2680 in++;
2681 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2682 ((*in >= 0x41) && (*in <= 0x5A)) ||
2683 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002684 (*in == '_') || (*in == '-') ||
2685 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002686 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002687 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002688 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002689 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002690 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002691 ctxt->nbChars += count;
2692 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002693 if (ret == NULL)
2694 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002695 return(ret);
2696 }
2697 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002698 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002699}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002700
Daniel Veillard46de64e2002-05-29 08:21:33 +00002701/**
2702 * xmlParseNameAndCompare:
2703 * @ctxt: an XML parser context
2704 *
2705 * parse an XML name and compares for match
2706 * (specialized for endtag parsing)
2707 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002708 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2709 * and the name for mismatch
2710 */
2711
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002712static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002713xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2714 const xmlChar *cmp = other;
2715 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002716 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002717
2718 GROW;
2719
2720 in = ctxt->input->cur;
2721 while (*in != 0 && *in == *cmp) {
2722 ++in;
2723 ++cmp;
2724 }
2725 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2726 /* success */
2727 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002729 }
2730 /* failure (or end of input buffer), check with full function */
2731 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002732 /* strings coming from the dictionnary direct compare possible */
2733 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002734 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002735 }
2736 return ret;
2737}
2738
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002739static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002740xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002741 int len = 0, l;
2742 int c;
2743 int count = 0;
2744
2745 /*
2746 * Handler for more complex cases
2747 */
2748 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002749 c = CUR_CHAR(l);
2750 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2751 (!IS_LETTER(c) && (c != '_') &&
2752 (c != ':'))) {
2753 return(NULL);
2754 }
2755
2756 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2757 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2758 (c == '.') || (c == '-') ||
2759 (c == '_') || (c == ':') ||
2760 (IS_COMBINING(c)) ||
2761 (IS_EXTENDER(c)))) {
2762 if (count++ > 100) {
2763 count = 0;
2764 GROW;
2765 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002766 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002767 NEXTL(l);
2768 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002769 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002770 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002771}
2772
2773/**
2774 * xmlParseStringName:
2775 * @ctxt: an XML parser context
2776 * @str: a pointer to the string pointer (IN/OUT)
2777 *
2778 * parse an XML name.
2779 *
2780 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2781 * CombiningChar | Extender
2782 *
2783 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2784 *
2785 * [6] Names ::= Name (S Name)*
2786 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002787 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002788 * is updated to the current location in the string.
2789 */
2790
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002791static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002792xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2793 xmlChar buf[XML_MAX_NAMELEN + 5];
2794 const xmlChar *cur = *str;
2795 int len = 0, l;
2796 int c;
2797
2798 c = CUR_SCHAR(cur, l);
2799 if (!IS_LETTER(c) && (c != '_') &&
2800 (c != ':')) {
2801 return(NULL);
2802 }
2803
2804 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2805 (c == '.') || (c == '-') ||
2806 (c == '_') || (c == ':') ||
2807 (IS_COMBINING(c)) ||
2808 (IS_EXTENDER(c))) {
2809 COPY_BUF(l,buf,len,c);
2810 cur += l;
2811 c = CUR_SCHAR(cur, l);
2812 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2813 /*
2814 * Okay someone managed to make a huge name, so he's ready to pay
2815 * for the processing speed.
2816 */
2817 xmlChar *buffer;
2818 int max = len * 2;
2819
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002820 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002821 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002822 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002823 return(NULL);
2824 }
2825 memcpy(buffer, buf, len);
2826 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2827 (c == '.') || (c == '-') ||
2828 (c == '_') || (c == ':') ||
2829 (IS_COMBINING(c)) ||
2830 (IS_EXTENDER(c))) {
2831 if (len + 10 > max) {
2832 max *= 2;
2833 buffer = (xmlChar *) xmlRealloc(buffer,
2834 max * sizeof(xmlChar));
2835 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002837 return(NULL);
2838 }
2839 }
2840 COPY_BUF(l,buffer,len,c);
2841 cur += l;
2842 c = CUR_SCHAR(cur, l);
2843 }
2844 buffer[len] = 0;
2845 *str = cur;
2846 return(buffer);
2847 }
2848 }
2849 *str = cur;
2850 return(xmlStrndup(buf, len));
2851}
2852
2853/**
2854 * xmlParseNmtoken:
2855 * @ctxt: an XML parser context
2856 *
2857 * parse an XML Nmtoken.
2858 *
2859 * [7] Nmtoken ::= (NameChar)+
2860 *
2861 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2862 *
2863 * Returns the Nmtoken parsed or NULL
2864 */
2865
2866xmlChar *
2867xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2868 xmlChar buf[XML_MAX_NAMELEN + 5];
2869 int len = 0, l;
2870 int c;
2871 int count = 0;
2872
2873 GROW;
2874 c = CUR_CHAR(l);
2875
2876 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2877 (c == '.') || (c == '-') ||
2878 (c == '_') || (c == ':') ||
2879 (IS_COMBINING(c)) ||
2880 (IS_EXTENDER(c))) {
2881 if (count++ > 100) {
2882 count = 0;
2883 GROW;
2884 }
2885 COPY_BUF(l,buf,len,c);
2886 NEXTL(l);
2887 c = CUR_CHAR(l);
2888 if (len >= XML_MAX_NAMELEN) {
2889 /*
2890 * Okay someone managed to make a huge token, so he's ready to pay
2891 * for the processing speed.
2892 */
2893 xmlChar *buffer;
2894 int max = len * 2;
2895
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002896 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002897 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002898 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002899 return(NULL);
2900 }
2901 memcpy(buffer, buf, len);
2902 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2903 (c == '.') || (c == '-') ||
2904 (c == '_') || (c == ':') ||
2905 (IS_COMBINING(c)) ||
2906 (IS_EXTENDER(c))) {
2907 if (count++ > 100) {
2908 count = 0;
2909 GROW;
2910 }
2911 if (len + 10 > max) {
2912 max *= 2;
2913 buffer = (xmlChar *) xmlRealloc(buffer,
2914 max * sizeof(xmlChar));
2915 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002916 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002917 return(NULL);
2918 }
2919 }
2920 COPY_BUF(l,buffer,len,c);
2921 NEXTL(l);
2922 c = CUR_CHAR(l);
2923 }
2924 buffer[len] = 0;
2925 return(buffer);
2926 }
2927 }
2928 if (len == 0)
2929 return(NULL);
2930 return(xmlStrndup(buf, len));
2931}
2932
2933/**
2934 * xmlParseEntityValue:
2935 * @ctxt: an XML parser context
2936 * @orig: if non-NULL store a copy of the original entity value
2937 *
2938 * parse a value for ENTITY declarations
2939 *
2940 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2941 * "'" ([^%&'] | PEReference | Reference)* "'"
2942 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002943 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002944 */
2945
2946xmlChar *
2947xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2948 xmlChar *buf = NULL;
2949 int len = 0;
2950 int size = XML_PARSER_BUFFER_SIZE;
2951 int c, l;
2952 xmlChar stop;
2953 xmlChar *ret = NULL;
2954 const xmlChar *cur = NULL;
2955 xmlParserInputPtr input;
2956
2957 if (RAW == '"') stop = '"';
2958 else if (RAW == '\'') stop = '\'';
2959 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 return(NULL);
2962 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002963 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002964 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002965 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002966 return(NULL);
2967 }
2968
2969 /*
2970 * The content of the entity definition is copied in a buffer.
2971 */
2972
2973 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2974 input = ctxt->input;
2975 GROW;
2976 NEXT;
2977 c = CUR_CHAR(l);
2978 /*
2979 * NOTE: 4.4.5 Included in Literal
2980 * When a parameter entity reference appears in a literal entity
2981 * value, ... a single or double quote character in the replacement
2982 * text is always treated as a normal data character and will not
2983 * terminate the literal.
2984 * In practice it means we stop the loop only when back at parsing
2985 * the initial entity and the quote is found
2986 */
2987 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2988 (ctxt->input != input))) {
2989 if (len + 5 >= size) {
2990 size *= 2;
2991 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2992 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002993 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002994 return(NULL);
2995 }
2996 }
2997 COPY_BUF(l,buf,len,c);
2998 NEXTL(l);
2999 /*
3000 * Pop-up of finished entities.
3001 */
3002 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3003 xmlPopInput(ctxt);
3004
3005 GROW;
3006 c = CUR_CHAR(l);
3007 if (c == 0) {
3008 GROW;
3009 c = CUR_CHAR(l);
3010 }
3011 }
3012 buf[len] = 0;
3013
3014 /*
3015 * Raise problem w.r.t. '&' and '%' being used in non-entities
3016 * reference constructs. Note Charref will be handled in
3017 * xmlStringDecodeEntities()
3018 */
3019 cur = buf;
3020 while (*cur != 0) { /* non input consuming */
3021 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3022 xmlChar *name;
3023 xmlChar tmp = *cur;
3024
3025 cur++;
3026 name = xmlParseStringName(ctxt, &cur);
3027 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003028 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003029 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003030 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003031 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003032 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3033 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003034 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003035 }
3036 if (name != NULL)
3037 xmlFree(name);
3038 }
3039 cur++;
3040 }
3041
3042 /*
3043 * Then PEReference entities are substituted.
3044 */
3045 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003046 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003047 xmlFree(buf);
3048 } else {
3049 NEXT;
3050 /*
3051 * NOTE: 4.4.7 Bypassed
3052 * When a general entity reference appears in the EntityValue in
3053 * an entity declaration, it is bypassed and left as is.
3054 * so XML_SUBSTITUTE_REF is not set here.
3055 */
3056 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3057 0, 0, 0);
3058 if (orig != NULL)
3059 *orig = buf;
3060 else
3061 xmlFree(buf);
3062 }
3063
3064 return(ret);
3065}
3066
3067/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003068 * xmlParseAttValueComplex:
3069 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003070 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003071 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003072 *
3073 * parse a value for an attribute, this is the fallback function
3074 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003075 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003076 *
3077 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3078 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003079static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003080xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003081 xmlChar limit = 0;
3082 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003083 int len = 0;
3084 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003085 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003086 xmlChar *current = NULL;
3087 xmlEntityPtr ent;
3088
Owen Taylor3473f882001-02-23 17:55:21 +00003089 if (NXT(0) == '"') {
3090 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3091 limit = '"';
3092 NEXT;
3093 } else if (NXT(0) == '\'') {
3094 limit = '\'';
3095 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3096 NEXT;
3097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003098 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003099 return(NULL);
3100 }
3101
3102 /*
3103 * allocate a translation buffer.
3104 */
3105 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003106 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003107 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003108
3109 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003110 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003111 */
3112 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003113 while ((NXT(0) != limit) && /* checked */
3114 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003115 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003116 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003117 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003118 if (NXT(1) == '#') {
3119 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003120
Owen Taylor3473f882001-02-23 17:55:21 +00003121 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003122 if (ctxt->replaceEntities) {
3123 if (len > buf_size - 10) {
3124 growBuffer(buf);
3125 }
3126 buf[len++] = '&';
3127 } else {
3128 /*
3129 * The reparsing will be done in xmlStringGetNodeList()
3130 * called by the attribute() function in SAX.c
3131 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003132 if (len > buf_size - 10) {
3133 growBuffer(buf);
3134 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003135 buf[len++] = '&';
3136 buf[len++] = '#';
3137 buf[len++] = '3';
3138 buf[len++] = '8';
3139 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003140 }
3141 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003142 if (len > buf_size - 10) {
3143 growBuffer(buf);
3144 }
Owen Taylor3473f882001-02-23 17:55:21 +00003145 len += xmlCopyChar(0, &buf[len], val);
3146 }
3147 } else {
3148 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003149 if ((ent != NULL) &&
3150 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 if ((ctxt->replaceEntities == 0) &&
3155 (ent->content[0] == '&')) {
3156 buf[len++] = '&';
3157 buf[len++] = '#';
3158 buf[len++] = '3';
3159 buf[len++] = '8';
3160 buf[len++] = ';';
3161 } else {
3162 buf[len++] = ent->content[0];
3163 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003164 } else if ((ent != NULL) &&
3165 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003166 xmlChar *rep;
3167
3168 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3169 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003170 XML_SUBSTITUTE_REF,
3171 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003172 if (rep != NULL) {
3173 current = rep;
3174 while (*current != 0) { /* non input consuming */
3175 buf[len++] = *current++;
3176 if (len > buf_size - 10) {
3177 growBuffer(buf);
3178 }
3179 }
3180 xmlFree(rep);
3181 }
3182 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003183 if (len > buf_size - 10) {
3184 growBuffer(buf);
3185 }
Owen Taylor3473f882001-02-23 17:55:21 +00003186 if (ent->content != NULL)
3187 buf[len++] = ent->content[0];
3188 }
3189 } else if (ent != NULL) {
3190 int i = xmlStrlen(ent->name);
3191 const xmlChar *cur = ent->name;
3192
3193 /*
3194 * This may look absurd but is needed to detect
3195 * entities problems
3196 */
3197 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3198 (ent->content != NULL)) {
3199 xmlChar *rep;
3200 rep = xmlStringDecodeEntities(ctxt, ent->content,
3201 XML_SUBSTITUTE_REF, 0, 0, 0);
3202 if (rep != NULL)
3203 xmlFree(rep);
3204 }
3205
3206 /*
3207 * Just output the reference
3208 */
3209 buf[len++] = '&';
3210 if (len > buf_size - i - 10) {
3211 growBuffer(buf);
3212 }
3213 for (;i > 0;i--)
3214 buf[len++] = *cur++;
3215 buf[len++] = ';';
3216 }
3217 }
3218 } else {
3219 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 if ((len != 0) || (!normalize)) {
3221 if ((!normalize) || (!in_space)) {
3222 COPY_BUF(l,buf,len,0x20);
3223 if (len > buf_size - 10) {
3224 growBuffer(buf);
3225 }
3226 }
3227 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003228 }
3229 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003230 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003231 COPY_BUF(l,buf,len,c);
3232 if (len > buf_size - 10) {
3233 growBuffer(buf);
3234 }
3235 }
3236 NEXTL(l);
3237 }
3238 GROW;
3239 c = CUR_CHAR(l);
3240 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003241 if ((in_space) && (normalize)) {
3242 while (buf[len - 1] == 0x20) len--;
3243 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003244 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003245 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003246 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003247 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003248 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3249 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003250 } else
3251 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003252 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003253 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003254
3255mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003256 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003257 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258}
3259
3260/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003261 * xmlParseAttValue:
3262 * @ctxt: an XML parser context
3263 *
3264 * parse a value for an attribute
3265 * Note: the parser won't do substitution of entities here, this
3266 * will be handled later in xmlStringGetNodeList
3267 *
3268 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3269 * "'" ([^<&'] | Reference)* "'"
3270 *
3271 * 3.3.3 Attribute-Value Normalization:
3272 * Before the value of an attribute is passed to the application or
3273 * checked for validity, the XML processor must normalize it as follows:
3274 * - a character reference is processed by appending the referenced
3275 * character to the attribute value
3276 * - an entity reference is processed by recursively processing the
3277 * replacement text of the entity
3278 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3279 * appending #x20 to the normalized value, except that only a single
3280 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3281 * parsed entity or the literal entity value of an internal parsed entity
3282 * - other characters are processed by appending them to the normalized value
3283 * If the declared value is not CDATA, then the XML processor must further
3284 * process the normalized attribute value by discarding any leading and
3285 * trailing space (#x20) characters, and by replacing sequences of space
3286 * (#x20) characters by a single space (#x20) character.
3287 * All attributes for which no declaration has been read should be treated
3288 * by a non-validating parser as if declared CDATA.
3289 *
3290 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3291 */
3292
3293
3294xmlChar *
3295xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003296 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003297}
3298
3299/**
Owen Taylor3473f882001-02-23 17:55:21 +00003300 * xmlParseSystemLiteral:
3301 * @ctxt: an XML parser context
3302 *
3303 * parse an XML Literal
3304 *
3305 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3306 *
3307 * Returns the SystemLiteral parsed or NULL
3308 */
3309
3310xmlChar *
3311xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3312 xmlChar *buf = NULL;
3313 int len = 0;
3314 int size = XML_PARSER_BUFFER_SIZE;
3315 int cur, l;
3316 xmlChar stop;
3317 int state = ctxt->instate;
3318 int count = 0;
3319
3320 SHRINK;
3321 if (RAW == '"') {
3322 NEXT;
3323 stop = '"';
3324 } else if (RAW == '\'') {
3325 NEXT;
3326 stop = '\'';
3327 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003328 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003329 return(NULL);
3330 }
3331
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003332 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003333 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 return(NULL);
3336 }
3337 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3338 cur = CUR_CHAR(l);
3339 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3340 if (len + 5 >= size) {
3341 size *= 2;
3342 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3343 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003344 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003345 ctxt->instate = (xmlParserInputState) state;
3346 return(NULL);
3347 }
3348 }
3349 count++;
3350 if (count > 50) {
3351 GROW;
3352 count = 0;
3353 }
3354 COPY_BUF(l,buf,len,cur);
3355 NEXTL(l);
3356 cur = CUR_CHAR(l);
3357 if (cur == 0) {
3358 GROW;
3359 SHRINK;
3360 cur = CUR_CHAR(l);
3361 }
3362 }
3363 buf[len] = 0;
3364 ctxt->instate = (xmlParserInputState) state;
3365 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003366 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003367 } else {
3368 NEXT;
3369 }
3370 return(buf);
3371}
3372
3373/**
3374 * xmlParsePubidLiteral:
3375 * @ctxt: an XML parser context
3376 *
3377 * parse an XML public literal
3378 *
3379 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3380 *
3381 * Returns the PubidLiteral parsed or NULL.
3382 */
3383
3384xmlChar *
3385xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3386 xmlChar *buf = NULL;
3387 int len = 0;
3388 int size = XML_PARSER_BUFFER_SIZE;
3389 xmlChar cur;
3390 xmlChar stop;
3391 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003392 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003393
3394 SHRINK;
3395 if (RAW == '"') {
3396 NEXT;
3397 stop = '"';
3398 } else if (RAW == '\'') {
3399 NEXT;
3400 stop = '\'';
3401 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003402 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003403 return(NULL);
3404 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003405 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003407 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003408 return(NULL);
3409 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003410 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003411 cur = CUR;
3412 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3413 if (len + 1 >= size) {
3414 size *= 2;
3415 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3416 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003417 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003418 return(NULL);
3419 }
3420 }
3421 buf[len++] = cur;
3422 count++;
3423 if (count > 50) {
3424 GROW;
3425 count = 0;
3426 }
3427 NEXT;
3428 cur = CUR;
3429 if (cur == 0) {
3430 GROW;
3431 SHRINK;
3432 cur = CUR;
3433 }
3434 }
3435 buf[len] = 0;
3436 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003437 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003438 } else {
3439 NEXT;
3440 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003441 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003442 return(buf);
3443}
3444
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003446/**
3447 * xmlParseCharData:
3448 * @ctxt: an XML parser context
3449 * @cdata: int indicating whether we are within a CDATA section
3450 *
3451 * parse a CharData section.
3452 * if we are within a CDATA section ']]>' marks an end of section.
3453 *
3454 * The right angle bracket (>) may be represented using the string "&gt;",
3455 * and must, for compatibility, be escaped using "&gt;" or a character
3456 * reference when it appears in the string "]]>" in content, when that
3457 * string is not marking the end of a CDATA section.
3458 *
3459 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3460 */
3461
3462void
3463xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003464 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003465 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003466 int line = ctxt->input->line;
3467 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003468
3469 SHRINK;
3470 GROW;
3471 /*
3472 * Accelerated common case where input don't need to be
3473 * modified before passing it to the handler.
3474 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003475 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 in = ctxt->input->cur;
3477 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003478get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003479 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3480 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003481 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003482 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003483 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003484 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003485 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003486 ctxt->input->line++;
3487 in++;
3488 }
3489 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003490 }
3491 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003492 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003493 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003494 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003495 return;
3496 }
3497 in++;
3498 goto get_more;
3499 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003500 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003501 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003502 if ((ctxt->sax->ignorableWhitespace !=
3503 ctxt->sax->characters) &&
3504 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003505 const xmlChar *tmp = ctxt->input->cur;
3506 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003507
Daniel Veillarda7374592001-05-10 14:17:55 +00003508 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003509 ctxt->sax->ignorableWhitespace(ctxt->userData,
3510 tmp, nbchar);
3511 } else if (ctxt->sax->characters != NULL)
3512 ctxt->sax->characters(ctxt->userData,
3513 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003514 line = ctxt->input->line;
3515 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003516 } else {
3517 if (ctxt->sax->characters != NULL)
3518 ctxt->sax->characters(ctxt->userData,
3519 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003520 line = ctxt->input->line;
3521 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003522 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003523 }
3524 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003525 if (*in == 0xD) {
3526 in++;
3527 if (*in == 0xA) {
3528 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003529 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003530 ctxt->input->line++;
3531 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003532 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003533 in--;
3534 }
3535 if (*in == '<') {
3536 return;
3537 }
3538 if (*in == '&') {
3539 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003540 }
3541 SHRINK;
3542 GROW;
3543 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003544 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003545 nbchar = 0;
3546 }
Daniel Veillard50582112001-03-26 22:52:16 +00003547 ctxt->input->line = line;
3548 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003549 xmlParseCharDataComplex(ctxt, cdata);
3550}
3551
Daniel Veillard01c13b52002-12-10 15:19:08 +00003552/**
3553 * xmlParseCharDataComplex:
3554 * @ctxt: an XML parser context
3555 * @cdata: int indicating whether we are within a CDATA section
3556 *
3557 * parse a CharData section.this is the fallback function
3558 * of xmlParseCharData() when the parsing requires handling
3559 * of non-ASCII characters.
3560 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003561void
3562xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003563 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3564 int nbchar = 0;
3565 int cur, l;
3566 int count = 0;
3567
3568 SHRINK;
3569 GROW;
3570 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003571 while ((cur != '<') && /* checked */
3572 (cur != '&') &&
3573 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003574 if ((cur == ']') && (NXT(1) == ']') &&
3575 (NXT(2) == '>')) {
3576 if (cdata) break;
3577 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003578 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003579 }
3580 }
3581 COPY_BUF(l,buf,nbchar,cur);
3582 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003583 buf[nbchar] = 0;
3584
Owen Taylor3473f882001-02-23 17:55:21 +00003585 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003586 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003587 */
3588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3589 if (areBlanks(ctxt, buf, nbchar)) {
3590 if (ctxt->sax->ignorableWhitespace != NULL)
3591 ctxt->sax->ignorableWhitespace(ctxt->userData,
3592 buf, nbchar);
3593 } else {
3594 if (ctxt->sax->characters != NULL)
3595 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3596 }
3597 }
3598 nbchar = 0;
3599 }
3600 count++;
3601 if (count > 50) {
3602 GROW;
3603 count = 0;
3604 }
3605 NEXTL(l);
3606 cur = CUR_CHAR(l);
3607 }
3608 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003609 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003610 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003611 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003612 */
3613 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3614 if (areBlanks(ctxt, buf, nbchar)) {
3615 if (ctxt->sax->ignorableWhitespace != NULL)
3616 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3617 } else {
3618 if (ctxt->sax->characters != NULL)
3619 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3620 }
3621 }
3622 }
3623}
3624
3625/**
3626 * xmlParseExternalID:
3627 * @ctxt: an XML parser context
3628 * @publicID: a xmlChar** receiving PubidLiteral
3629 * @strict: indicate whether we should restrict parsing to only
3630 * production [75], see NOTE below
3631 *
3632 * Parse an External ID or a Public ID
3633 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003634 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003635 * 'PUBLIC' S PubidLiteral S SystemLiteral
3636 *
3637 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3638 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3639 *
3640 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3641 *
3642 * Returns the function returns SystemLiteral and in the second
3643 * case publicID receives PubidLiteral, is strict is off
3644 * it is possible to return NULL and have publicID set.
3645 */
3646
3647xmlChar *
3648xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3649 xmlChar *URI = NULL;
3650
3651 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003652
3653 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003654 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3655 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3656 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3657 SKIP(6);
3658 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3660 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003661 }
3662 SKIP_BLANKS;
3663 URI = xmlParseSystemLiteral(ctxt);
3664 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003665 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003666 }
3667 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3668 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3669 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3670 SKIP(6);
3671 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003673 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
3675 SKIP_BLANKS;
3676 *publicID = xmlParsePubidLiteral(ctxt);
3677 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003678 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003679 }
3680 if (strict) {
3681 /*
3682 * We don't handle [83] so "S SystemLiteral" is required.
3683 */
3684 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003686 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003687 }
3688 } else {
3689 /*
3690 * We handle [83] so we return immediately, if
3691 * "S SystemLiteral" is not detected. From a purely parsing
3692 * point of view that's a nice mess.
3693 */
3694 const xmlChar *ptr;
3695 GROW;
3696
3697 ptr = CUR_PTR;
3698 if (!IS_BLANK(*ptr)) return(NULL);
3699
3700 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3701 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3702 }
3703 SKIP_BLANKS;
3704 URI = xmlParseSystemLiteral(ctxt);
3705 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003706 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003707 }
3708 }
3709 return(URI);
3710}
3711
3712/**
3713 * xmlParseComment:
3714 * @ctxt: an XML parser context
3715 *
3716 * Skip an XML (SGML) comment <!-- .... -->
3717 * The spec says that "For compatibility, the string "--" (double-hyphen)
3718 * must not occur within comments. "
3719 *
3720 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3721 */
3722void
3723xmlParseComment(xmlParserCtxtPtr ctxt) {
3724 xmlChar *buf = NULL;
3725 int len;
3726 int size = XML_PARSER_BUFFER_SIZE;
3727 int q, ql;
3728 int r, rl;
3729 int cur, l;
3730 xmlParserInputState state;
3731 xmlParserInputPtr input = ctxt->input;
3732 int count = 0;
3733
3734 /*
3735 * Check that there is a comment right here.
3736 */
3737 if ((RAW != '<') || (NXT(1) != '!') ||
3738 (NXT(2) != '-') || (NXT(3) != '-')) return;
3739
3740 state = ctxt->instate;
3741 ctxt->instate = XML_PARSER_COMMENT;
3742 SHRINK;
3743 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003744 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003746 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 ctxt->instate = state;
3748 return;
3749 }
3750 q = CUR_CHAR(ql);
3751 NEXTL(ql);
3752 r = CUR_CHAR(rl);
3753 NEXTL(rl);
3754 cur = CUR_CHAR(l);
3755 len = 0;
3756 while (IS_CHAR(cur) && /* checked */
3757 ((cur != '>') ||
3758 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003759 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003760 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 if (len + 5 >= size) {
3763 size *= 2;
3764 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3765 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003766 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767 ctxt->instate = state;
3768 return;
3769 }
3770 }
3771 COPY_BUF(ql,buf,len,q);
3772 q = r;
3773 ql = rl;
3774 r = cur;
3775 rl = l;
3776
3777 count++;
3778 if (count > 50) {
3779 GROW;
3780 count = 0;
3781 }
3782 NEXTL(l);
3783 cur = CUR_CHAR(l);
3784 if (cur == 0) {
3785 SHRINK;
3786 GROW;
3787 cur = CUR_CHAR(l);
3788 }
3789 }
3790 buf[len] = 0;
3791 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003792 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003793 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 xmlFree(buf);
3795 } else {
3796 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003797 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3798 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003799 }
3800 NEXT;
3801 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3802 (!ctxt->disableSAX))
3803 ctxt->sax->comment(ctxt->userData, buf);
3804 xmlFree(buf);
3805 }
3806 ctxt->instate = state;
3807}
3808
3809/**
3810 * xmlParsePITarget:
3811 * @ctxt: an XML parser context
3812 *
3813 * parse the name of a PI
3814 *
3815 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3816 *
3817 * Returns the PITarget name or NULL
3818 */
3819
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003820const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003821xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003822 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003823
3824 name = xmlParseName(ctxt);
3825 if ((name != NULL) &&
3826 ((name[0] == 'x') || (name[0] == 'X')) &&
3827 ((name[1] == 'm') || (name[1] == 'M')) &&
3828 ((name[2] == 'l') || (name[2] == 'L'))) {
3829 int i;
3830 if ((name[0] == 'x') && (name[1] == 'm') &&
3831 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003832 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003833 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003834 return(name);
3835 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003836 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003837 return(name);
3838 }
3839 for (i = 0;;i++) {
3840 if (xmlW3CPIs[i] == NULL) break;
3841 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3842 return(name);
3843 }
3844 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3845 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3846 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003847 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003848 }
3849 }
3850 return(name);
3851}
3852
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003853#ifdef LIBXML_CATALOG_ENABLED
3854/**
3855 * xmlParseCatalogPI:
3856 * @ctxt: an XML parser context
3857 * @catalog: the PI value string
3858 *
3859 * parse an XML Catalog Processing Instruction.
3860 *
3861 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3862 *
3863 * Occurs only if allowed by the user and if happening in the Misc
3864 * part of the document before any doctype informations
3865 * This will add the given catalog to the parsing context in order
3866 * to be used if there is a resolution need further down in the document
3867 */
3868
3869static void
3870xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3871 xmlChar *URL = NULL;
3872 const xmlChar *tmp, *base;
3873 xmlChar marker;
3874
3875 tmp = catalog;
3876 while (IS_BLANK(*tmp)) tmp++;
3877 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3878 goto error;
3879 tmp += 7;
3880 while (IS_BLANK(*tmp)) tmp++;
3881 if (*tmp != '=') {
3882 return;
3883 }
3884 tmp++;
3885 while (IS_BLANK(*tmp)) tmp++;
3886 marker = *tmp;
3887 if ((marker != '\'') && (marker != '"'))
3888 goto error;
3889 tmp++;
3890 base = tmp;
3891 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3892 if (*tmp == 0)
3893 goto error;
3894 URL = xmlStrndup(base, tmp - base);
3895 tmp++;
3896 while (IS_BLANK(*tmp)) tmp++;
3897 if (*tmp != 0)
3898 goto error;
3899
3900 if (URL != NULL) {
3901 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3902 xmlFree(URL);
3903 }
3904 return;
3905
3906error:
3907 ctxt->errNo = XML_WAR_CATALOG_PI;
3908 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3909 ctxt->sax->warning(ctxt->userData,
3910 "Catalog PI syntax error: %s\n", catalog);
3911 if (URL != NULL)
3912 xmlFree(URL);
3913}
3914#endif
3915
Owen Taylor3473f882001-02-23 17:55:21 +00003916/**
3917 * xmlParsePI:
3918 * @ctxt: an XML parser context
3919 *
3920 * parse an XML Processing Instruction.
3921 *
3922 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3923 *
3924 * The processing is transfered to SAX once parsed.
3925 */
3926
3927void
3928xmlParsePI(xmlParserCtxtPtr ctxt) {
3929 xmlChar *buf = NULL;
3930 int len = 0;
3931 int size = XML_PARSER_BUFFER_SIZE;
3932 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003933 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003934 xmlParserInputState state;
3935 int count = 0;
3936
3937 if ((RAW == '<') && (NXT(1) == '?')) {
3938 xmlParserInputPtr input = ctxt->input;
3939 state = ctxt->instate;
3940 ctxt->instate = XML_PARSER_PI;
3941 /*
3942 * this is a Processing Instruction.
3943 */
3944 SKIP(2);
3945 SHRINK;
3946
3947 /*
3948 * Parse the target name and check for special support like
3949 * namespace.
3950 */
3951 target = xmlParsePITarget(ctxt);
3952 if (target != NULL) {
3953 if ((RAW == '?') && (NXT(1) == '>')) {
3954 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003955 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3956 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003957 }
3958 SKIP(2);
3959
3960 /*
3961 * SAX: PI detected.
3962 */
3963 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3964 (ctxt->sax->processingInstruction != NULL))
3965 ctxt->sax->processingInstruction(ctxt->userData,
3966 target, NULL);
3967 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003968 return;
3969 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003970 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003971 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003972 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003973 ctxt->instate = state;
3974 return;
3975 }
3976 cur = CUR;
3977 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003978 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3979 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003980 }
3981 SKIP_BLANKS;
3982 cur = CUR_CHAR(l);
3983 while (IS_CHAR(cur) && /* checked */
3984 ((cur != '?') || (NXT(1) != '>'))) {
3985 if (len + 5 >= size) {
3986 size *= 2;
3987 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3988 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003989 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003990 ctxt->instate = state;
3991 return;
3992 }
3993 }
3994 count++;
3995 if (count > 50) {
3996 GROW;
3997 count = 0;
3998 }
3999 COPY_BUF(l,buf,len,cur);
4000 NEXTL(l);
4001 cur = CUR_CHAR(l);
4002 if (cur == 0) {
4003 SHRINK;
4004 GROW;
4005 cur = CUR_CHAR(l);
4006 }
4007 }
4008 buf[len] = 0;
4009 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004010 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4011 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004012 } else {
4013 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004014 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4015 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004016 }
4017 SKIP(2);
4018
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004019#ifdef LIBXML_CATALOG_ENABLED
4020 if (((state == XML_PARSER_MISC) ||
4021 (state == XML_PARSER_START)) &&
4022 (xmlStrEqual(target, XML_CATALOG_PI))) {
4023 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4024 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4025 (allow == XML_CATA_ALLOW_ALL))
4026 xmlParseCatalogPI(ctxt, buf);
4027 }
4028#endif
4029
4030
Owen Taylor3473f882001-02-23 17:55:21 +00004031 /*
4032 * SAX: PI detected.
4033 */
4034 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4035 (ctxt->sax->processingInstruction != NULL))
4036 ctxt->sax->processingInstruction(ctxt->userData,
4037 target, buf);
4038 }
4039 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004040 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004041 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004042 }
4043 ctxt->instate = state;
4044 }
4045}
4046
4047/**
4048 * xmlParseNotationDecl:
4049 * @ctxt: an XML parser context
4050 *
4051 * parse a notation declaration
4052 *
4053 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4054 *
4055 * Hence there is actually 3 choices:
4056 * 'PUBLIC' S PubidLiteral
4057 * 'PUBLIC' S PubidLiteral S SystemLiteral
4058 * and 'SYSTEM' S SystemLiteral
4059 *
4060 * See the NOTE on xmlParseExternalID().
4061 */
4062
4063void
4064xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004065 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 xmlChar *Pubid;
4067 xmlChar *Systemid;
4068
4069 if ((RAW == '<') && (NXT(1) == '!') &&
4070 (NXT(2) == 'N') && (NXT(3) == 'O') &&
4071 (NXT(4) == 'T') && (NXT(5) == 'A') &&
4072 (NXT(6) == 'T') && (NXT(7) == 'I') &&
4073 (NXT(8) == 'O') && (NXT(9) == 'N')) {
4074 xmlParserInputPtr input = ctxt->input;
4075 SHRINK;
4076 SKIP(10);
4077 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4079 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004080 return;
4081 }
4082 SKIP_BLANKS;
4083
Daniel Veillard76d66f42001-05-16 21:05:17 +00004084 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004085 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004086 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004087 return;
4088 }
4089 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004091 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004092 return;
4093 }
4094 SKIP_BLANKS;
4095
4096 /*
4097 * Parse the IDs.
4098 */
4099 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4100 SKIP_BLANKS;
4101
4102 if (RAW == '>') {
4103 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4105 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004106 }
4107 NEXT;
4108 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4109 (ctxt->sax->notationDecl != NULL))
4110 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4111 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004112 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004113 }
Owen Taylor3473f882001-02-23 17:55:21 +00004114 if (Systemid != NULL) xmlFree(Systemid);
4115 if (Pubid != NULL) xmlFree(Pubid);
4116 }
4117}
4118
4119/**
4120 * xmlParseEntityDecl:
4121 * @ctxt: an XML parser context
4122 *
4123 * parse <!ENTITY declarations
4124 *
4125 * [70] EntityDecl ::= GEDecl | PEDecl
4126 *
4127 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4128 *
4129 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4130 *
4131 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4132 *
4133 * [74] PEDef ::= EntityValue | ExternalID
4134 *
4135 * [76] NDataDecl ::= S 'NDATA' S Name
4136 *
4137 * [ VC: Notation Declared ]
4138 * The Name must match the declared name of a notation.
4139 */
4140
4141void
4142xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004143 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004144 xmlChar *value = NULL;
4145 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004146 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004147 int isParameter = 0;
4148 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004149 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004150
4151 GROW;
4152 if ((RAW == '<') && (NXT(1) == '!') &&
4153 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4154 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4155 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
4156 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004157 SHRINK;
4158 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004159 skipped = SKIP_BLANKS;
4160 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004161 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4162 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004163 }
Owen Taylor3473f882001-02-23 17:55:21 +00004164
4165 if (RAW == '%') {
4166 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004167 skipped = SKIP_BLANKS;
4168 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004169 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4170 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004171 }
Owen Taylor3473f882001-02-23 17:55:21 +00004172 isParameter = 1;
4173 }
4174
Daniel Veillard76d66f42001-05-16 21:05:17 +00004175 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004177 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4178 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004179 return;
4180 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004181 skipped = SKIP_BLANKS;
4182 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004183 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4184 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004185 }
Owen Taylor3473f882001-02-23 17:55:21 +00004186
Daniel Veillardf5582f12002-06-11 10:08:16 +00004187 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 /*
4189 * handle the various case of definitions...
4190 */
4191 if (isParameter) {
4192 if ((RAW == '"') || (RAW == '\'')) {
4193 value = xmlParseEntityValue(ctxt, &orig);
4194 if (value) {
4195 if ((ctxt->sax != NULL) &&
4196 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4197 ctxt->sax->entityDecl(ctxt->userData, name,
4198 XML_INTERNAL_PARAMETER_ENTITY,
4199 NULL, NULL, value);
4200 }
4201 } else {
4202 URI = xmlParseExternalID(ctxt, &literal, 1);
4203 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004204 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004205 }
4206 if (URI) {
4207 xmlURIPtr uri;
4208
4209 uri = xmlParseURI((const char *) URI);
4210 if (uri == NULL) {
4211 ctxt->errNo = XML_ERR_INVALID_URI;
4212 if ((ctxt->sax != NULL) &&
4213 (!ctxt->disableSAX) &&
4214 (ctxt->sax->error != NULL))
4215 ctxt->sax->error(ctxt->userData,
4216 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004217 /*
4218 * This really ought to be a well formedness error
4219 * but the XML Core WG decided otherwise c.f. issue
4220 * E26 of the XML erratas.
4221 */
Owen Taylor3473f882001-02-23 17:55:21 +00004222 } else {
4223 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004224 /*
4225 * Okay this is foolish to block those but not
4226 * invalid URIs.
4227 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004228 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004229 } else {
4230 if ((ctxt->sax != NULL) &&
4231 (!ctxt->disableSAX) &&
4232 (ctxt->sax->entityDecl != NULL))
4233 ctxt->sax->entityDecl(ctxt->userData, name,
4234 XML_EXTERNAL_PARAMETER_ENTITY,
4235 literal, URI, NULL);
4236 }
4237 xmlFreeURI(uri);
4238 }
4239 }
4240 }
4241 } else {
4242 if ((RAW == '"') || (RAW == '\'')) {
4243 value = xmlParseEntityValue(ctxt, &orig);
4244 if ((ctxt->sax != NULL) &&
4245 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4246 ctxt->sax->entityDecl(ctxt->userData, name,
4247 XML_INTERNAL_GENERAL_ENTITY,
4248 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004249 /*
4250 * For expat compatibility in SAX mode.
4251 */
4252 if ((ctxt->myDoc == NULL) ||
4253 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4254 if (ctxt->myDoc == NULL) {
4255 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4256 }
4257 if (ctxt->myDoc->intSubset == NULL)
4258 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4259 BAD_CAST "fake", NULL, NULL);
4260
Daniel Veillard1af9a412003-08-20 22:54:39 +00004261 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4262 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004263 }
Owen Taylor3473f882001-02-23 17:55:21 +00004264 } else {
4265 URI = xmlParseExternalID(ctxt, &literal, 1);
4266 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004267 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 if (URI) {
4270 xmlURIPtr uri;
4271
4272 uri = xmlParseURI((const char *)URI);
4273 if (uri == NULL) {
4274 ctxt->errNo = XML_ERR_INVALID_URI;
4275 if ((ctxt->sax != NULL) &&
4276 (!ctxt->disableSAX) &&
4277 (ctxt->sax->error != NULL))
4278 ctxt->sax->error(ctxt->userData,
4279 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004280 /*
4281 * This really ought to be a well formedness error
4282 * but the XML Core WG decided otherwise c.f. issue
4283 * E26 of the XML erratas.
4284 */
Owen Taylor3473f882001-02-23 17:55:21 +00004285 } else {
4286 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004287 /*
4288 * Okay this is foolish to block those but not
4289 * invalid URIs.
4290 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004291 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004292 }
4293 xmlFreeURI(uri);
4294 }
4295 }
4296 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4298 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004299 }
4300 SKIP_BLANKS;
4301 if ((RAW == 'N') && (NXT(1) == 'D') &&
4302 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4303 (NXT(4) == 'A')) {
4304 SKIP(5);
4305 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004306 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4307 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004308 }
4309 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004310 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004311 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4312 (ctxt->sax->unparsedEntityDecl != NULL))
4313 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4314 literal, URI, ndata);
4315 } else {
4316 if ((ctxt->sax != NULL) &&
4317 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4318 ctxt->sax->entityDecl(ctxt->userData, name,
4319 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4320 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004321 /*
4322 * For expat compatibility in SAX mode.
4323 * assuming the entity repalcement was asked for
4324 */
4325 if ((ctxt->replaceEntities != 0) &&
4326 ((ctxt->myDoc == NULL) ||
4327 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4328 if (ctxt->myDoc == NULL) {
4329 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4330 }
4331
4332 if (ctxt->myDoc->intSubset == NULL)
4333 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4334 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004335 xmlSAX2EntityDecl(ctxt, name,
4336 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4337 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004338 }
Owen Taylor3473f882001-02-23 17:55:21 +00004339 }
4340 }
4341 }
4342 SKIP_BLANKS;
4343 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004344 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004345 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004346 } else {
4347 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004348 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4349 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004350 }
4351 NEXT;
4352 }
4353 if (orig != NULL) {
4354 /*
4355 * Ugly mechanism to save the raw entity value.
4356 */
4357 xmlEntityPtr cur = NULL;
4358
4359 if (isParameter) {
4360 if ((ctxt->sax != NULL) &&
4361 (ctxt->sax->getParameterEntity != NULL))
4362 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4363 } else {
4364 if ((ctxt->sax != NULL) &&
4365 (ctxt->sax->getEntity != NULL))
4366 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004367 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004368 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004369 }
Owen Taylor3473f882001-02-23 17:55:21 +00004370 }
4371 if (cur != NULL) {
4372 if (cur->orig != NULL)
4373 xmlFree(orig);
4374 else
4375 cur->orig = orig;
4376 } else
4377 xmlFree(orig);
4378 }
Owen Taylor3473f882001-02-23 17:55:21 +00004379 if (value != NULL) xmlFree(value);
4380 if (URI != NULL) xmlFree(URI);
4381 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004382 }
4383}
4384
4385/**
4386 * xmlParseDefaultDecl:
4387 * @ctxt: an XML parser context
4388 * @value: Receive a possible fixed default value for the attribute
4389 *
4390 * Parse an attribute default declaration
4391 *
4392 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4393 *
4394 * [ VC: Required Attribute ]
4395 * if the default declaration is the keyword #REQUIRED, then the
4396 * attribute must be specified for all elements of the type in the
4397 * attribute-list declaration.
4398 *
4399 * [ VC: Attribute Default Legal ]
4400 * The declared default value must meet the lexical constraints of
4401 * the declared attribute type c.f. xmlValidateAttributeDecl()
4402 *
4403 * [ VC: Fixed Attribute Default ]
4404 * if an attribute has a default value declared with the #FIXED
4405 * keyword, instances of that attribute must match the default value.
4406 *
4407 * [ WFC: No < in Attribute Values ]
4408 * handled in xmlParseAttValue()
4409 *
4410 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4411 * or XML_ATTRIBUTE_FIXED.
4412 */
4413
4414int
4415xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4416 int val;
4417 xmlChar *ret;
4418
4419 *value = NULL;
4420 if ((RAW == '#') && (NXT(1) == 'R') &&
4421 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4422 (NXT(4) == 'U') && (NXT(5) == 'I') &&
4423 (NXT(6) == 'R') && (NXT(7) == 'E') &&
4424 (NXT(8) == 'D')) {
4425 SKIP(9);
4426 return(XML_ATTRIBUTE_REQUIRED);
4427 }
4428 if ((RAW == '#') && (NXT(1) == 'I') &&
4429 (NXT(2) == 'M') && (NXT(3) == 'P') &&
4430 (NXT(4) == 'L') && (NXT(5) == 'I') &&
4431 (NXT(6) == 'E') && (NXT(7) == 'D')) {
4432 SKIP(8);
4433 return(XML_ATTRIBUTE_IMPLIED);
4434 }
4435 val = XML_ATTRIBUTE_NONE;
4436 if ((RAW == '#') && (NXT(1) == 'F') &&
4437 (NXT(2) == 'I') && (NXT(3) == 'X') &&
4438 (NXT(4) == 'E') && (NXT(5) == 'D')) {
4439 SKIP(6);
4440 val = XML_ATTRIBUTE_FIXED;
4441 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4443 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 SKIP_BLANKS;
4446 }
4447 ret = xmlParseAttValue(ctxt);
4448 ctxt->instate = XML_PARSER_DTD;
4449 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004450 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004451 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004452 } else
4453 *value = ret;
4454 return(val);
4455}
4456
4457/**
4458 * xmlParseNotationType:
4459 * @ctxt: an XML parser context
4460 *
4461 * parse an Notation attribute type.
4462 *
4463 * Note: the leading 'NOTATION' S part has already being parsed...
4464 *
4465 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4466 *
4467 * [ VC: Notation Attributes ]
4468 * Values of this type must match one of the notation names included
4469 * in the declaration; all notation names in the declaration must be declared.
4470 *
4471 * Returns: the notation attribute tree built while parsing
4472 */
4473
4474xmlEnumerationPtr
4475xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004476 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004477 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4478
4479 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004480 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(NULL);
4482 }
4483 SHRINK;
4484 do {
4485 NEXT;
4486 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004487 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004488 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004489 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4490 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004491 return(ret);
4492 }
4493 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004494 if (cur == NULL) return(ret);
4495 if (last == NULL) ret = last = cur;
4496 else {
4497 last->next = cur;
4498 last = cur;
4499 }
4500 SKIP_BLANKS;
4501 } while (RAW == '|');
4502 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004503 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004504 if ((last != NULL) && (last != ret))
4505 xmlFreeEnumeration(last);
4506 return(ret);
4507 }
4508 NEXT;
4509 return(ret);
4510}
4511
4512/**
4513 * xmlParseEnumerationType:
4514 * @ctxt: an XML parser context
4515 *
4516 * parse an Enumeration attribute type.
4517 *
4518 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4519 *
4520 * [ VC: Enumeration ]
4521 * Values of this type must match one of the Nmtoken tokens in
4522 * the declaration
4523 *
4524 * Returns: the enumeration attribute tree built while parsing
4525 */
4526
4527xmlEnumerationPtr
4528xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4529 xmlChar *name;
4530 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4531
4532 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004533 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004534 return(NULL);
4535 }
4536 SHRINK;
4537 do {
4538 NEXT;
4539 SKIP_BLANKS;
4540 name = xmlParseNmtoken(ctxt);
4541 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004542 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004543 return(ret);
4544 }
4545 cur = xmlCreateEnumeration(name);
4546 xmlFree(name);
4547 if (cur == NULL) return(ret);
4548 if (last == NULL) ret = last = cur;
4549 else {
4550 last->next = cur;
4551 last = cur;
4552 }
4553 SKIP_BLANKS;
4554 } while (RAW == '|');
4555 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004556 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004557 return(ret);
4558 }
4559 NEXT;
4560 return(ret);
4561}
4562
4563/**
4564 * xmlParseEnumeratedType:
4565 * @ctxt: an XML parser context
4566 * @tree: the enumeration tree built while parsing
4567 *
4568 * parse an Enumerated attribute type.
4569 *
4570 * [57] EnumeratedType ::= NotationType | Enumeration
4571 *
4572 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4573 *
4574 *
4575 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4576 */
4577
4578int
4579xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4580 if ((RAW == 'N') && (NXT(1) == 'O') &&
4581 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4582 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4583 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4584 SKIP(8);
4585 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004586 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4587 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004588 return(0);
4589 }
4590 SKIP_BLANKS;
4591 *tree = xmlParseNotationType(ctxt);
4592 if (*tree == NULL) return(0);
4593 return(XML_ATTRIBUTE_NOTATION);
4594 }
4595 *tree = xmlParseEnumerationType(ctxt);
4596 if (*tree == NULL) return(0);
4597 return(XML_ATTRIBUTE_ENUMERATION);
4598}
4599
4600/**
4601 * xmlParseAttributeType:
4602 * @ctxt: an XML parser context
4603 * @tree: the enumeration tree built while parsing
4604 *
4605 * parse the Attribute list def for an element
4606 *
4607 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4608 *
4609 * [55] StringType ::= 'CDATA'
4610 *
4611 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4612 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4613 *
4614 * Validity constraints for attribute values syntax are checked in
4615 * xmlValidateAttributeValue()
4616 *
4617 * [ VC: ID ]
4618 * Values of type ID must match the Name production. A name must not
4619 * appear more than once in an XML document as a value of this type;
4620 * i.e., ID values must uniquely identify the elements which bear them.
4621 *
4622 * [ VC: One ID per Element Type ]
4623 * No element type may have more than one ID attribute specified.
4624 *
4625 * [ VC: ID Attribute Default ]
4626 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4627 *
4628 * [ VC: IDREF ]
4629 * Values of type IDREF must match the Name production, and values
4630 * of type IDREFS must match Names; each IDREF Name must match the value
4631 * of an ID attribute on some element in the XML document; i.e. IDREF
4632 * values must match the value of some ID attribute.
4633 *
4634 * [ VC: Entity Name ]
4635 * Values of type ENTITY must match the Name production, values
4636 * of type ENTITIES must match Names; each Entity Name must match the
4637 * name of an unparsed entity declared in the DTD.
4638 *
4639 * [ VC: Name Token ]
4640 * Values of type NMTOKEN must match the Nmtoken production; values
4641 * of type NMTOKENS must match Nmtokens.
4642 *
4643 * Returns the attribute type
4644 */
4645int
4646xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4647 SHRINK;
4648 if ((RAW == 'C') && (NXT(1) == 'D') &&
4649 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4650 (NXT(4) == 'A')) {
4651 SKIP(5);
4652 return(XML_ATTRIBUTE_CDATA);
4653 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4654 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4655 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4656 SKIP(6);
4657 return(XML_ATTRIBUTE_IDREFS);
4658 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4659 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4660 (NXT(4) == 'F')) {
4661 SKIP(5);
4662 return(XML_ATTRIBUTE_IDREF);
4663 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4664 SKIP(2);
4665 return(XML_ATTRIBUTE_ID);
4666 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4667 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4668 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4669 SKIP(6);
4670 return(XML_ATTRIBUTE_ENTITY);
4671 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4672 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4673 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4674 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4675 SKIP(8);
4676 return(XML_ATTRIBUTE_ENTITIES);
4677 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4678 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4679 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4680 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4681 SKIP(8);
4682 return(XML_ATTRIBUTE_NMTOKENS);
4683 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4684 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4685 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4686 (NXT(6) == 'N')) {
4687 SKIP(7);
4688 return(XML_ATTRIBUTE_NMTOKEN);
4689 }
4690 return(xmlParseEnumeratedType(ctxt, tree));
4691}
4692
4693/**
4694 * xmlParseAttributeListDecl:
4695 * @ctxt: an XML parser context
4696 *
4697 * : parse the Attribute list def for an element
4698 *
4699 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4700 *
4701 * [53] AttDef ::= S Name S AttType S DefaultDecl
4702 *
4703 */
4704void
4705xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004706 const xmlChar *elemName;
4707 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004708 xmlEnumerationPtr tree;
4709
4710 if ((RAW == '<') && (NXT(1) == '!') &&
4711 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4712 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4713 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4714 (NXT(8) == 'T')) {
4715 xmlParserInputPtr input = ctxt->input;
4716
4717 SKIP(9);
4718 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004719 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004720 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004721 }
4722 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004723 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004724 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004725 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4726 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004727 return;
4728 }
4729 SKIP_BLANKS;
4730 GROW;
4731 while (RAW != '>') {
4732 const xmlChar *check = CUR_PTR;
4733 int type;
4734 int def;
4735 xmlChar *defaultValue = NULL;
4736
4737 GROW;
4738 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004739 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004741 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4742 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004743 break;
4744 }
4745 GROW;
4746 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004747 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004748 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004749 if (defaultValue != NULL)
4750 xmlFree(defaultValue);
4751 break;
4752 }
4753 SKIP_BLANKS;
4754
4755 type = xmlParseAttributeType(ctxt, &tree);
4756 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004757 if (defaultValue != NULL)
4758 xmlFree(defaultValue);
4759 break;
4760 }
4761
4762 GROW;
4763 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004764 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004766 if (defaultValue != NULL)
4767 xmlFree(defaultValue);
4768 if (tree != NULL)
4769 xmlFreeEnumeration(tree);
4770 break;
4771 }
4772 SKIP_BLANKS;
4773
4774 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4775 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004776 if (defaultValue != NULL)
4777 xmlFree(defaultValue);
4778 if (tree != NULL)
4779 xmlFreeEnumeration(tree);
4780 break;
4781 }
4782
4783 GROW;
4784 if (RAW != '>') {
4785 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004787 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004788 if (defaultValue != NULL)
4789 xmlFree(defaultValue);
4790 if (tree != NULL)
4791 xmlFreeEnumeration(tree);
4792 break;
4793 }
4794 SKIP_BLANKS;
4795 }
4796 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004797 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4798 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004799 if (defaultValue != NULL)
4800 xmlFree(defaultValue);
4801 if (tree != NULL)
4802 xmlFreeEnumeration(tree);
4803 break;
4804 }
4805 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4806 (ctxt->sax->attributeDecl != NULL))
4807 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4808 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004809 else if (tree != NULL)
4810 xmlFreeEnumeration(tree);
4811
4812 if ((ctxt->sax2) && (defaultValue != NULL) &&
4813 (def != XML_ATTRIBUTE_IMPLIED) &&
4814 (def != XML_ATTRIBUTE_REQUIRED)) {
4815 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4816 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004817 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4818 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4819 }
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (defaultValue != NULL)
4821 xmlFree(defaultValue);
4822 GROW;
4823 }
4824 if (RAW == '>') {
4825 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004826 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4827 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 }
4829 NEXT;
4830 }
Owen Taylor3473f882001-02-23 17:55:21 +00004831 }
4832}
4833
4834/**
4835 * xmlParseElementMixedContentDecl:
4836 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004837 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004838 *
4839 * parse the declaration for a Mixed Element content
4840 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4841 *
4842 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4843 * '(' S? '#PCDATA' S? ')'
4844 *
4845 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4846 *
4847 * [ VC: No Duplicate Types ]
4848 * The same name must not appear more than once in a single
4849 * mixed-content declaration.
4850 *
4851 * returns: the list of the xmlElementContentPtr describing the element choices
4852 */
4853xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004854xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004855 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004856 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004857
4858 GROW;
4859 if ((RAW == '#') && (NXT(1) == 'P') &&
4860 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4861 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4862 (NXT(6) == 'A')) {
4863 SKIP(7);
4864 SKIP_BLANKS;
4865 SHRINK;
4866 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004867 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004868 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4869 if (ctxt->vctxt.error != NULL)
4870 ctxt->vctxt.error(ctxt->vctxt.userData,
4871"Element content declaration doesn't start and stop in the same entity\n");
4872 ctxt->valid = 0;
4873 }
Owen Taylor3473f882001-02-23 17:55:21 +00004874 NEXT;
4875 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4876 if (RAW == '*') {
4877 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4878 NEXT;
4879 }
4880 return(ret);
4881 }
4882 if ((RAW == '(') || (RAW == '|')) {
4883 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4884 if (ret == NULL) return(NULL);
4885 }
4886 while (RAW == '|') {
4887 NEXT;
4888 if (elem == NULL) {
4889 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4890 if (ret == NULL) return(NULL);
4891 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004892 if (cur != NULL)
4893 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004894 cur = ret;
4895 } else {
4896 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4897 if (n == NULL) return(NULL);
4898 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004899 if (n->c1 != NULL)
4900 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004901 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004902 if (n != NULL)
4903 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004904 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004905 }
4906 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004907 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004908 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004909 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004910 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004911 xmlFreeElementContent(cur);
4912 return(NULL);
4913 }
4914 SKIP_BLANKS;
4915 GROW;
4916 }
4917 if ((RAW == ')') && (NXT(1) == '*')) {
4918 if (elem != NULL) {
4919 cur->c2 = xmlNewElementContent(elem,
4920 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004921 if (cur->c2 != NULL)
4922 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004925 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004926 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4927 if (ctxt->vctxt.error != NULL)
4928 ctxt->vctxt.error(ctxt->vctxt.userData,
4929"Element content declaration doesn't start and stop in the same entity\n");
4930 ctxt->valid = 0;
4931 }
Owen Taylor3473f882001-02-23 17:55:21 +00004932 SKIP(2);
4933 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004934 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004935 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004936 return(NULL);
4937 }
4938
4939 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004940 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004941 }
4942 return(ret);
4943}
4944
4945/**
4946 * xmlParseElementChildrenContentDecl:
4947 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004948 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004949 *
4950 * parse the declaration for a Mixed Element content
4951 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4952 *
4953 *
4954 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4955 *
4956 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4957 *
4958 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4959 *
4960 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4961 *
4962 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4963 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004964 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004965 * opening or closing parentheses in a choice, seq, or Mixed
4966 * construct is contained in the replacement text for a parameter
4967 * entity, both must be contained in the same replacement text. For
4968 * interoperability, if a parameter-entity reference appears in a
4969 * choice, seq, or Mixed construct, its replacement text should not
4970 * be empty, and neither the first nor last non-blank character of
4971 * the replacement text should be a connector (| or ,).
4972 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004973 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004974 * hierarchy.
4975 */
4976xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004977xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004978 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004979 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004980 xmlChar type = 0;
4981
4982 SKIP_BLANKS;
4983 GROW;
4984 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004985 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004986
Owen Taylor3473f882001-02-23 17:55:21 +00004987 /* Recurse on first child */
4988 NEXT;
4989 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004990 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 SKIP_BLANKS;
4992 GROW;
4993 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004994 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004996 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004997 return(NULL);
4998 }
4999 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005000 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005001 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005002 return(NULL);
5003 }
Owen Taylor3473f882001-02-23 17:55:21 +00005004 GROW;
5005 if (RAW == '?') {
5006 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5007 NEXT;
5008 } else if (RAW == '*') {
5009 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5010 NEXT;
5011 } else if (RAW == '+') {
5012 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5013 NEXT;
5014 } else {
5015 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5016 }
Owen Taylor3473f882001-02-23 17:55:21 +00005017 GROW;
5018 }
5019 SKIP_BLANKS;
5020 SHRINK;
5021 while (RAW != ')') {
5022 /*
5023 * Each loop we parse one separator and one element.
5024 */
5025 if (RAW == ',') {
5026 if (type == 0) type = CUR;
5027
5028 /*
5029 * Detect "Name | Name , Name" error
5030 */
5031 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005032 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005033 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005034 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005035 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005036 xmlFreeElementContent(last);
5037 if (ret != NULL)
5038 xmlFreeElementContent(ret);
5039 return(NULL);
5040 }
5041 NEXT;
5042
5043 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5044 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005045 if ((last != NULL) && (last != ret))
5046 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005047 xmlFreeElementContent(ret);
5048 return(NULL);
5049 }
5050 if (last == NULL) {
5051 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005052 if (ret != NULL)
5053 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005054 ret = cur = op;
5055 } else {
5056 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005057 if (op != NULL)
5058 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005059 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005060 if (last != NULL)
5061 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005062 cur =op;
5063 last = NULL;
5064 }
5065 } else if (RAW == '|') {
5066 if (type == 0) type = CUR;
5067
5068 /*
5069 * Detect "Name , Name | Name" error
5070 */
5071 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005072 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005073 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005074 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005075 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005076 xmlFreeElementContent(last);
5077 if (ret != NULL)
5078 xmlFreeElementContent(ret);
5079 return(NULL);
5080 }
5081 NEXT;
5082
5083 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5084 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005085 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005086 xmlFreeElementContent(last);
5087 if (ret != NULL)
5088 xmlFreeElementContent(ret);
5089 return(NULL);
5090 }
5091 if (last == NULL) {
5092 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005093 if (ret != NULL)
5094 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005095 ret = cur = op;
5096 } else {
5097 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005098 if (op != NULL)
5099 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005100 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005101 if (last != NULL)
5102 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 cur =op;
5104 last = NULL;
5105 }
5106 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005107 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005108 if (ret != NULL)
5109 xmlFreeElementContent(ret);
5110 return(NULL);
5111 }
5112 GROW;
5113 SKIP_BLANKS;
5114 GROW;
5115 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005116 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005117 /* Recurse on second child */
5118 NEXT;
5119 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005120 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005121 SKIP_BLANKS;
5122 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005123 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005124 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005125 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 if (ret != NULL)
5127 xmlFreeElementContent(ret);
5128 return(NULL);
5129 }
5130 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005131 if (RAW == '?') {
5132 last->ocur = XML_ELEMENT_CONTENT_OPT;
5133 NEXT;
5134 } else if (RAW == '*') {
5135 last->ocur = XML_ELEMENT_CONTENT_MULT;
5136 NEXT;
5137 } else if (RAW == '+') {
5138 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5139 NEXT;
5140 } else {
5141 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5142 }
5143 }
5144 SKIP_BLANKS;
5145 GROW;
5146 }
5147 if ((cur != NULL) && (last != NULL)) {
5148 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005149 if (last != NULL)
5150 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005151 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005152 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005153 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5154 if (ctxt->vctxt.error != NULL)
5155 ctxt->vctxt.error(ctxt->vctxt.userData,
5156"Element content declaration doesn't start and stop in the same entity\n");
5157 ctxt->valid = 0;
5158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 NEXT;
5160 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005161 if (ret != NULL)
5162 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005163 NEXT;
5164 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005165 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005166 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005167 cur = ret;
5168 /*
5169 * Some normalization:
5170 * (a | b* | c?)* == (a | b | c)*
5171 */
5172 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5173 if ((cur->c1 != NULL) &&
5174 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5175 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5176 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5177 if ((cur->c2 != NULL) &&
5178 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5179 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5180 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5181 cur = cur->c2;
5182 }
5183 }
Owen Taylor3473f882001-02-23 17:55:21 +00005184 NEXT;
5185 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005186 if (ret != NULL) {
5187 int found = 0;
5188
Daniel Veillarde470df72001-04-18 21:41:07 +00005189 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005190 /*
5191 * Some normalization:
5192 * (a | b*)+ == (a | b)*
5193 * (a | b?)+ == (a | b)*
5194 */
5195 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5196 if ((cur->c1 != NULL) &&
5197 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5198 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5199 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5200 found = 1;
5201 }
5202 if ((cur->c2 != NULL) &&
5203 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5204 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5205 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5206 found = 1;
5207 }
5208 cur = cur->c2;
5209 }
5210 if (found)
5211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5212 }
Owen Taylor3473f882001-02-23 17:55:21 +00005213 NEXT;
5214 }
5215 return(ret);
5216}
5217
5218/**
5219 * xmlParseElementContentDecl:
5220 * @ctxt: an XML parser context
5221 * @name: the name of the element being defined.
5222 * @result: the Element Content pointer will be stored here if any
5223 *
5224 * parse the declaration for an Element content either Mixed or Children,
5225 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5226 *
5227 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5228 *
5229 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5230 */
5231
5232int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005233xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005234 xmlElementContentPtr *result) {
5235
5236 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005237 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005238 int res;
5239
5240 *result = NULL;
5241
5242 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005243 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005244 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 return(-1);
5246 }
5247 NEXT;
5248 GROW;
5249 SKIP_BLANKS;
5250 if ((RAW == '#') && (NXT(1) == 'P') &&
5251 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5252 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5253 (NXT(6) == 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005254 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 res = XML_ELEMENT_TYPE_MIXED;
5256 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005257 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005258 res = XML_ELEMENT_TYPE_ELEMENT;
5259 }
Owen Taylor3473f882001-02-23 17:55:21 +00005260 SKIP_BLANKS;
5261 *result = tree;
5262 return(res);
5263}
5264
5265/**
5266 * xmlParseElementDecl:
5267 * @ctxt: an XML parser context
5268 *
5269 * parse an Element declaration.
5270 *
5271 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5272 *
5273 * [ VC: Unique Element Type Declaration ]
5274 * No element type may be declared more than once
5275 *
5276 * Returns the type of the element, or -1 in case of error
5277 */
5278int
5279xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005280 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005281 int ret = -1;
5282 xmlElementContentPtr content = NULL;
5283
5284 GROW;
5285 if ((RAW == '<') && (NXT(1) == '!') &&
5286 (NXT(2) == 'E') && (NXT(3) == 'L') &&
5287 (NXT(4) == 'E') && (NXT(5) == 'M') &&
5288 (NXT(6) == 'E') && (NXT(7) == 'N') &&
5289 (NXT(8) == 'T')) {
5290 xmlParserInputPtr input = ctxt->input;
5291
5292 SKIP(9);
5293 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005294 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5295 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
5297 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005298 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005299 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005300 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5301 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005302 return(-1);
5303 }
5304 while ((RAW == 0) && (ctxt->inputNr > 1))
5305 xmlPopInput(ctxt);
5306 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5308 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005309 }
5310 SKIP_BLANKS;
5311 if ((RAW == 'E') && (NXT(1) == 'M') &&
5312 (NXT(2) == 'P') && (NXT(3) == 'T') &&
5313 (NXT(4) == 'Y')) {
5314 SKIP(5);
5315 /*
5316 * Element must always be empty.
5317 */
5318 ret = XML_ELEMENT_TYPE_EMPTY;
5319 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5320 (NXT(2) == 'Y')) {
5321 SKIP(3);
5322 /*
5323 * Element is a generic container.
5324 */
5325 ret = XML_ELEMENT_TYPE_ANY;
5326 } else if (RAW == '(') {
5327 ret = xmlParseElementContentDecl(ctxt, name, &content);
5328 } else {
5329 /*
5330 * [ WFC: PEs in Internal Subset ] error handling.
5331 */
5332 if ((RAW == '%') && (ctxt->external == 0) &&
5333 (ctxt->inputNr == 1)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005334 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
5335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5336 ctxt->sax->error(ctxt->userData,
5337 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005338 } else {
5339 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
5340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5341 ctxt->sax->error(ctxt->userData,
5342 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5343 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005344 ctxt->wellFormed = 0;
5345 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005346 return(-1);
5347 }
5348
5349 SKIP_BLANKS;
5350 /*
5351 * Pop-up of finished entities.
5352 */
5353 while ((RAW == 0) && (ctxt->inputNr > 1))
5354 xmlPopInput(ctxt);
5355 SKIP_BLANKS;
5356
5357 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005358 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 } else {
5360 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005361 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5362 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005363 }
5364
5365 NEXT;
5366 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5367 (ctxt->sax->elementDecl != NULL))
5368 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5369 content);
5370 }
5371 if (content != NULL) {
5372 xmlFreeElementContent(content);
5373 }
Owen Taylor3473f882001-02-23 17:55:21 +00005374 }
5375 return(ret);
5376}
5377
5378/**
Owen Taylor3473f882001-02-23 17:55:21 +00005379 * xmlParseConditionalSections
5380 * @ctxt: an XML parser context
5381 *
5382 * [61] conditionalSect ::= includeSect | ignoreSect
5383 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5384 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5385 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5386 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5387 */
5388
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005389static void
Owen Taylor3473f882001-02-23 17:55:21 +00005390xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5391 SKIP(3);
5392 SKIP_BLANKS;
5393 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5394 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5395 (NXT(6) == 'E')) {
5396 SKIP(7);
5397 SKIP_BLANKS;
5398 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005399 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005400 } else {
5401 NEXT;
5402 }
5403 if (xmlParserDebugEntities) {
5404 if ((ctxt->input != NULL) && (ctxt->input->filename))
5405 xmlGenericError(xmlGenericErrorContext,
5406 "%s(%d): ", ctxt->input->filename,
5407 ctxt->input->line);
5408 xmlGenericError(xmlGenericErrorContext,
5409 "Entering INCLUDE Conditional Section\n");
5410 }
5411
5412 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5413 (NXT(2) != '>'))) {
5414 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005415 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005416
5417 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5418 xmlParseConditionalSections(ctxt);
5419 } else if (IS_BLANK(CUR)) {
5420 NEXT;
5421 } else if (RAW == '%') {
5422 xmlParsePEReference(ctxt);
5423 } else
5424 xmlParseMarkupDecl(ctxt);
5425
5426 /*
5427 * Pop-up of finished entities.
5428 */
5429 while ((RAW == 0) && (ctxt->inputNr > 1))
5430 xmlPopInput(ctxt);
5431
Daniel Veillardfdc91562002-07-01 21:52:03 +00005432 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005433 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005434 break;
5435 }
5436 }
5437 if (xmlParserDebugEntities) {
5438 if ((ctxt->input != NULL) && (ctxt->input->filename))
5439 xmlGenericError(xmlGenericErrorContext,
5440 "%s(%d): ", ctxt->input->filename,
5441 ctxt->input->line);
5442 xmlGenericError(xmlGenericErrorContext,
5443 "Leaving INCLUDE Conditional Section\n");
5444 }
5445
5446 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5447 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5448 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005449 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005450 int depth = 0;
5451
5452 SKIP(6);
5453 SKIP_BLANKS;
5454 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005455 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 } else {
5457 NEXT;
5458 }
5459 if (xmlParserDebugEntities) {
5460 if ((ctxt->input != NULL) && (ctxt->input->filename))
5461 xmlGenericError(xmlGenericErrorContext,
5462 "%s(%d): ", ctxt->input->filename,
5463 ctxt->input->line);
5464 xmlGenericError(xmlGenericErrorContext,
5465 "Entering IGNORE Conditional Section\n");
5466 }
5467
5468 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005469 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005470 * But disable SAX event generating DTD building in the meantime
5471 */
5472 state = ctxt->disableSAX;
5473 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005474 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005475 ctxt->instate = XML_PARSER_IGNORE;
5476
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005477 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005478 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5479 depth++;
5480 SKIP(3);
5481 continue;
5482 }
5483 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5484 if (--depth >= 0) SKIP(3);
5485 continue;
5486 }
5487 NEXT;
5488 continue;
5489 }
5490
5491 ctxt->disableSAX = state;
5492 ctxt->instate = instate;
5493
5494 if (xmlParserDebugEntities) {
5495 if ((ctxt->input != NULL) && (ctxt->input->filename))
5496 xmlGenericError(xmlGenericErrorContext,
5497 "%s(%d): ", ctxt->input->filename,
5498 ctxt->input->line);
5499 xmlGenericError(xmlGenericErrorContext,
5500 "Leaving IGNORE Conditional Section\n");
5501 }
5502
5503 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005504 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005505 }
5506
5507 if (RAW == 0)
5508 SHRINK;
5509
5510 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005511 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005512 } else {
5513 SKIP(3);
5514 }
5515}
5516
5517/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005518 * xmlParseMarkupDecl:
5519 * @ctxt: an XML parser context
5520 *
5521 * parse Markup declarations
5522 *
5523 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5524 * NotationDecl | PI | Comment
5525 *
5526 * [ VC: Proper Declaration/PE Nesting ]
5527 * Parameter-entity replacement text must be properly nested with
5528 * markup declarations. That is to say, if either the first character
5529 * or the last character of a markup declaration (markupdecl above) is
5530 * contained in the replacement text for a parameter-entity reference,
5531 * both must be contained in the same replacement text.
5532 *
5533 * [ WFC: PEs in Internal Subset ]
5534 * In the internal DTD subset, parameter-entity references can occur
5535 * only where markup declarations can occur, not within markup declarations.
5536 * (This does not apply to references that occur in external parameter
5537 * entities or to the external subset.)
5538 */
5539void
5540xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5541 GROW;
5542 xmlParseElementDecl(ctxt);
5543 xmlParseAttributeListDecl(ctxt);
5544 xmlParseEntityDecl(ctxt);
5545 xmlParseNotationDecl(ctxt);
5546 xmlParsePI(ctxt);
5547 xmlParseComment(ctxt);
5548 /*
5549 * This is only for internal subset. On external entities,
5550 * the replacement is done before parsing stage
5551 */
5552 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5553 xmlParsePEReference(ctxt);
5554
5555 /*
5556 * Conditional sections are allowed from entities included
5557 * by PE References in the internal subset.
5558 */
5559 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5560 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5561 xmlParseConditionalSections(ctxt);
5562 }
5563 }
5564
5565 ctxt->instate = XML_PARSER_DTD;
5566}
5567
5568/**
5569 * xmlParseTextDecl:
5570 * @ctxt: an XML parser context
5571 *
5572 * parse an XML declaration header for external entities
5573 *
5574 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5575 *
5576 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5577 */
5578
5579void
5580xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5581 xmlChar *version;
5582
5583 /*
5584 * We know that '<?xml' is here.
5585 */
5586 if ((RAW == '<') && (NXT(1) == '?') &&
5587 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5588 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5589 SKIP(5);
5590 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005591 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005592 return;
5593 }
5594
5595 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005596 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5597 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005598 }
5599 SKIP_BLANKS;
5600
5601 /*
5602 * We may have the VersionInfo here.
5603 */
5604 version = xmlParseVersionInfo(ctxt);
5605 if (version == NULL)
5606 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005607 else {
5608 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5610 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005611 }
5612 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005613 ctxt->input->version = version;
5614
5615 /*
5616 * We must have the encoding declaration
5617 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005618 xmlParseEncodingDecl(ctxt);
5619 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5620 /*
5621 * The XML REC instructs us to stop parsing right here
5622 */
5623 return;
5624 }
5625
5626 SKIP_BLANKS;
5627 if ((RAW == '?') && (NXT(1) == '>')) {
5628 SKIP(2);
5629 } else if (RAW == '>') {
5630 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005631 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005632 NEXT;
5633 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005634 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005635 MOVETO_ENDTAG(CUR_PTR);
5636 NEXT;
5637 }
5638}
5639
5640/**
Owen Taylor3473f882001-02-23 17:55:21 +00005641 * xmlParseExternalSubset:
5642 * @ctxt: an XML parser context
5643 * @ExternalID: the external identifier
5644 * @SystemID: the system identifier (or URL)
5645 *
5646 * parse Markup declarations from an external subset
5647 *
5648 * [30] extSubset ::= textDecl? extSubsetDecl
5649 *
5650 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5651 */
5652void
5653xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5654 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005655 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005656 GROW;
5657 if ((RAW == '<') && (NXT(1) == '?') &&
5658 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5659 (NXT(4) == 'l')) {
5660 xmlParseTextDecl(ctxt);
5661 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5662 /*
5663 * The XML REC instructs us to stop parsing right here
5664 */
5665 ctxt->instate = XML_PARSER_EOF;
5666 return;
5667 }
5668 }
5669 if (ctxt->myDoc == NULL) {
5670 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5671 }
5672 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5673 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5674
5675 ctxt->instate = XML_PARSER_DTD;
5676 ctxt->external = 1;
5677 while (((RAW == '<') && (NXT(1) == '?')) ||
5678 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005679 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005680 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005681 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005682
5683 GROW;
5684 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5685 xmlParseConditionalSections(ctxt);
5686 } else if (IS_BLANK(CUR)) {
5687 NEXT;
5688 } else if (RAW == '%') {
5689 xmlParsePEReference(ctxt);
5690 } else
5691 xmlParseMarkupDecl(ctxt);
5692
5693 /*
5694 * Pop-up of finished entities.
5695 */
5696 while ((RAW == 0) && (ctxt->inputNr > 1))
5697 xmlPopInput(ctxt);
5698
Daniel Veillardfdc91562002-07-01 21:52:03 +00005699 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005700 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005701 break;
5702 }
5703 }
5704
5705 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005706 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005707 }
5708
5709}
5710
5711/**
5712 * xmlParseReference:
5713 * @ctxt: an XML parser context
5714 *
5715 * parse and handle entity references in content, depending on the SAX
5716 * interface, this may end-up in a call to character() if this is a
5717 * CharRef, a predefined entity, if there is no reference() callback.
5718 * or if the parser was asked to switch to that mode.
5719 *
5720 * [67] Reference ::= EntityRef | CharRef
5721 */
5722void
5723xmlParseReference(xmlParserCtxtPtr ctxt) {
5724 xmlEntityPtr ent;
5725 xmlChar *val;
5726 if (RAW != '&') return;
5727
5728 if (NXT(1) == '#') {
5729 int i = 0;
5730 xmlChar out[10];
5731 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005732 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005733
5734 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5735 /*
5736 * So we are using non-UTF-8 buffers
5737 * Check that the char fit on 8bits, if not
5738 * generate a CharRef.
5739 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005740 if (value <= 0xFF) {
5741 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005742 out[1] = 0;
5743 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5744 (!ctxt->disableSAX))
5745 ctxt->sax->characters(ctxt->userData, out, 1);
5746 } else {
5747 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005748 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005749 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005750 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005751 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5752 (!ctxt->disableSAX))
5753 ctxt->sax->reference(ctxt->userData, out);
5754 }
5755 } else {
5756 /*
5757 * Just encode the value in UTF-8
5758 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005759 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 out[i] = 0;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5762 (!ctxt->disableSAX))
5763 ctxt->sax->characters(ctxt->userData, out, i);
5764 }
5765 } else {
5766 ent = xmlParseEntityRef(ctxt);
5767 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005768 if (!ctxt->wellFormed)
5769 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005770 if ((ent->name != NULL) &&
5771 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5772 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005773 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005774
5775
5776 /*
5777 * The first reference to the entity trigger a parsing phase
5778 * where the ent->children is filled with the result from
5779 * the parsing.
5780 */
5781 if (ent->children == NULL) {
5782 xmlChar *value;
5783 value = ent->content;
5784
5785 /*
5786 * Check that this entity is well formed
5787 */
5788 if ((value != NULL) &&
5789 (value[1] == 0) && (value[0] == '<') &&
5790 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5791 /*
5792 * DONE: get definite answer on this !!!
5793 * Lots of entity decls are used to declare a single
5794 * char
5795 * <!ENTITY lt "<">
5796 * Which seems to be valid since
5797 * 2.4: The ampersand character (&) and the left angle
5798 * bracket (<) may appear in their literal form only
5799 * when used ... They are also legal within the literal
5800 * entity value of an internal entity declaration;i
5801 * see "4.3.2 Well-Formed Parsed Entities".
5802 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5803 * Looking at the OASIS test suite and James Clark
5804 * tests, this is broken. However the XML REC uses
5805 * it. Is the XML REC not well-formed ????
5806 * This is a hack to avoid this problem
5807 *
5808 * ANSWER: since lt gt amp .. are already defined,
5809 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005810 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005811 * is lousy but acceptable.
5812 */
5813 list = xmlNewDocText(ctxt->myDoc, value);
5814 if (list != NULL) {
5815 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5816 (ent->children == NULL)) {
5817 ent->children = list;
5818 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005819 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005820 list->parent = (xmlNodePtr) ent;
5821 } else {
5822 xmlFreeNodeList(list);
5823 }
5824 } else if (list != NULL) {
5825 xmlFreeNodeList(list);
5826 }
5827 } else {
5828 /*
5829 * 4.3.2: An internal general parsed entity is well-formed
5830 * if its replacement text matches the production labeled
5831 * content.
5832 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005833
5834 void *user_data;
5835 /*
5836 * This is a bit hackish but this seems the best
5837 * way to make sure both SAX and DOM entity support
5838 * behaves okay.
5839 */
5840 if (ctxt->userData == ctxt)
5841 user_data = NULL;
5842 else
5843 user_data = ctxt->userData;
5844
Owen Taylor3473f882001-02-23 17:55:21 +00005845 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5846 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005847 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5848 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005849 ctxt->depth--;
5850 } else if (ent->etype ==
5851 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5852 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005853 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005854 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005855 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005856 ctxt->depth--;
5857 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005858 ret = XML_ERR_ENTITY_PE_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00005859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5860 ctxt->sax->error(ctxt->userData,
5861 "Internal: invalid entity type\n");
5862 }
5863 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005864 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005865 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005866 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005867 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5868 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005869 (ent->children == NULL)) {
5870 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005871 if (ctxt->replaceEntities) {
5872 /*
5873 * Prune it directly in the generated document
5874 * except for single text nodes.
5875 */
5876 if ((list->type == XML_TEXT_NODE) &&
5877 (list->next == NULL)) {
5878 list->parent = (xmlNodePtr) ent;
5879 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005880 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005881 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005882 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005883 while (list != NULL) {
5884 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005885 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005886 if (list->next == NULL)
5887 ent->last = list;
5888 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005889 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005890 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005891#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005892 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5893 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005894#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005895 }
5896 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005897 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005898 while (list != NULL) {
5899 list->parent = (xmlNodePtr) ent;
5900 if (list->next == NULL)
5901 ent->last = list;
5902 list = list->next;
5903 }
Owen Taylor3473f882001-02-23 17:55:21 +00005904 }
5905 } else {
5906 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005907 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005908 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005909 } else if ((ret != XML_ERR_OK) &&
5910 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005911 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005912 } else if (list != NULL) {
5913 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005914 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005915 }
5916 }
5917 }
5918 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5919 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5920 /*
5921 * Create a node.
5922 */
5923 ctxt->sax->reference(ctxt->userData, ent->name);
5924 return;
5925 } else if (ctxt->replaceEntities) {
5926 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5927 /*
5928 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005929 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005930 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005931 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005932 if ((list == NULL) && (ent->owner == 0)) {
5933 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005934 cur = ent->children;
5935 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005936 nw = xmlCopyNode(cur, 1);
5937 if (nw != NULL) {
5938 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005939 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005940 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005941 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005942 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005943 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005944 if (cur == ent->last)
5945 break;
5946 cur = cur->next;
5947 }
Daniel Veillard81273902003-09-30 00:43:48 +00005948#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005949 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005950 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005951#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005952 } else if (list == NULL) {
5953 xmlNodePtr nw = NULL, cur, next, last,
5954 firstChild = NULL;
5955 /*
5956 * Copy the entity child list and make it the new
5957 * entity child list. The goal is to make sure any
5958 * ID or REF referenced will be the one from the
5959 * document content and not the entity copy.
5960 */
5961 cur = ent->children;
5962 ent->children = NULL;
5963 last = ent->last;
5964 ent->last = NULL;
5965 while (cur != NULL) {
5966 next = cur->next;
5967 cur->next = NULL;
5968 cur->parent = NULL;
5969 nw = xmlCopyNode(cur, 1);
5970 if (nw != NULL) {
5971 nw->_private = cur->_private;
5972 if (firstChild == NULL){
5973 firstChild = cur;
5974 }
5975 xmlAddChild((xmlNodePtr) ent, nw);
5976 xmlAddChild(ctxt->node, cur);
5977 }
5978 if (cur == last)
5979 break;
5980 cur = next;
5981 }
5982 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005983#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005984 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5985 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005986#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005987 } else {
5988 /*
5989 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005990 * node with a possible previous text one which
5991 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005992 */
5993 if (ent->children->type == XML_TEXT_NODE)
5994 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5995 if ((ent->last != ent->children) &&
5996 (ent->last->type == XML_TEXT_NODE))
5997 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5998 xmlAddChildList(ctxt->node, ent->children);
5999 }
6000
Owen Taylor3473f882001-02-23 17:55:21 +00006001 /*
6002 * This is to avoid a nasty side effect, see
6003 * characters() in SAX.c
6004 */
6005 ctxt->nodemem = 0;
6006 ctxt->nodelen = 0;
6007 return;
6008 } else {
6009 /*
6010 * Probably running in SAX mode
6011 */
6012 xmlParserInputPtr input;
6013
6014 input = xmlNewEntityInputStream(ctxt, ent);
6015 xmlPushInput(ctxt, input);
6016 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6017 (RAW == '<') && (NXT(1) == '?') &&
6018 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6019 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6020 xmlParseTextDecl(ctxt);
6021 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6022 /*
6023 * The XML REC instructs us to stop parsing right here
6024 */
6025 ctxt->instate = XML_PARSER_EOF;
6026 return;
6027 }
6028 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6030 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006031 }
6032 }
6033 return;
6034 }
6035 }
6036 } else {
6037 val = ent->content;
6038 if (val == NULL) return;
6039 /*
6040 * inline the entity.
6041 */
6042 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6043 (!ctxt->disableSAX))
6044 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6045 }
6046 }
6047}
6048
6049/**
6050 * xmlParseEntityRef:
6051 * @ctxt: an XML parser context
6052 *
6053 * parse ENTITY references declarations
6054 *
6055 * [68] EntityRef ::= '&' Name ';'
6056 *
6057 * [ WFC: Entity Declared ]
6058 * In a document without any DTD, a document with only an internal DTD
6059 * subset which contains no parameter entity references, or a document
6060 * with "standalone='yes'", the Name given in the entity reference
6061 * must match that in an entity declaration, except that well-formed
6062 * documents need not declare any of the following entities: amp, lt,
6063 * gt, apos, quot. The declaration of a parameter entity must precede
6064 * any reference to it. Similarly, the declaration of a general entity
6065 * must precede any reference to it which appears in a default value in an
6066 * attribute-list declaration. Note that if entities are declared in the
6067 * external subset or in external parameter entities, a non-validating
6068 * processor is not obligated to read and process their declarations;
6069 * for such documents, the rule that an entity must be declared is a
6070 * well-formedness constraint only if standalone='yes'.
6071 *
6072 * [ WFC: Parsed Entity ]
6073 * An entity reference must not contain the name of an unparsed entity
6074 *
6075 * Returns the xmlEntityPtr if found, or NULL otherwise.
6076 */
6077xmlEntityPtr
6078xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006079 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006080 xmlEntityPtr ent = NULL;
6081
6082 GROW;
6083
6084 if (RAW == '&') {
6085 NEXT;
6086 name = xmlParseName(ctxt);
6087 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006088 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6089 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006090 } else {
6091 if (RAW == ';') {
6092 NEXT;
6093 /*
6094 * Ask first SAX for entity resolution, otherwise try the
6095 * predefined set.
6096 */
6097 if (ctxt->sax != NULL) {
6098 if (ctxt->sax->getEntity != NULL)
6099 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006100 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006101 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006102 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6103 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006104 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006105 }
Owen Taylor3473f882001-02-23 17:55:21 +00006106 }
6107 /*
6108 * [ WFC: Entity Declared ]
6109 * In a document without any DTD, a document with only an
6110 * internal DTD subset which contains no parameter entity
6111 * references, or a document with "standalone='yes'", the
6112 * Name given in the entity reference must match that in an
6113 * entity declaration, except that well-formed documents
6114 * need not declare any of the following entities: amp, lt,
6115 * gt, apos, quot.
6116 * The declaration of a parameter entity must precede any
6117 * reference to it.
6118 * Similarly, the declaration of a general entity must
6119 * precede any reference to it which appears in a default
6120 * value in an attribute-list declaration. Note that if
6121 * entities are declared in the external subset or in
6122 * external parameter entities, a non-validating processor
6123 * is not obligated to read and process their declarations;
6124 * for such documents, the rule that an entity must be
6125 * declared is a well-formedness constraint only if
6126 * standalone='yes'.
6127 */
6128 if (ent == NULL) {
6129 if ((ctxt->standalone == 1) ||
6130 ((ctxt->hasExternalSubset == 0) &&
6131 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006132 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006133 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006134 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006135 } else {
6136 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00006137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00006138 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00006139 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006140 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006141 }
6142 }
6143
6144 /*
6145 * [ WFC: Parsed Entity ]
6146 * An entity reference must not contain the name of an
6147 * unparsed entity
6148 */
6149 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006150 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006151 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006152 }
6153
6154 /*
6155 * [ WFC: No External Entity References ]
6156 * Attribute values cannot contain direct or indirect
6157 * entity references to external entities.
6158 */
6159 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6160 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006161 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6162 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006163 }
6164 /*
6165 * [ WFC: No < in Attribute Values ]
6166 * The replacement text of any entity referred to directly or
6167 * indirectly in an attribute value (other than "&lt;") must
6168 * not contain a <.
6169 */
6170 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6171 (ent != NULL) &&
6172 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6173 (ent->content != NULL) &&
6174 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006175 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006176 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006177 }
6178
6179 /*
6180 * Internal check, no parameter entities here ...
6181 */
6182 else {
6183 switch (ent->etype) {
6184 case XML_INTERNAL_PARAMETER_ENTITY:
6185 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006186 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6187 "Attempt to reference the parameter entity '%s'\n",
6188 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006189 break;
6190 default:
6191 break;
6192 }
6193 }
6194
6195 /*
6196 * [ WFC: No Recursion ]
6197 * A parsed entity must not contain a recursive reference
6198 * to itself, either directly or indirectly.
6199 * Done somewhere else
6200 */
6201
6202 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006203 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006204 }
Owen Taylor3473f882001-02-23 17:55:21 +00006205 }
6206 }
6207 return(ent);
6208}
6209
6210/**
6211 * xmlParseStringEntityRef:
6212 * @ctxt: an XML parser context
6213 * @str: a pointer to an index in the string
6214 *
6215 * parse ENTITY references declarations, but this version parses it from
6216 * a string value.
6217 *
6218 * [68] EntityRef ::= '&' Name ';'
6219 *
6220 * [ WFC: Entity Declared ]
6221 * In a document without any DTD, a document with only an internal DTD
6222 * subset which contains no parameter entity references, or a document
6223 * with "standalone='yes'", the Name given in the entity reference
6224 * must match that in an entity declaration, except that well-formed
6225 * documents need not declare any of the following entities: amp, lt,
6226 * gt, apos, quot. The declaration of a parameter entity must precede
6227 * any reference to it. Similarly, the declaration of a general entity
6228 * must precede any reference to it which appears in a default value in an
6229 * attribute-list declaration. Note that if entities are declared in the
6230 * external subset or in external parameter entities, a non-validating
6231 * processor is not obligated to read and process their declarations;
6232 * for such documents, the rule that an entity must be declared is a
6233 * well-formedness constraint only if standalone='yes'.
6234 *
6235 * [ WFC: Parsed Entity ]
6236 * An entity reference must not contain the name of an unparsed entity
6237 *
6238 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6239 * is updated to the current location in the string.
6240 */
6241xmlEntityPtr
6242xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6243 xmlChar *name;
6244 const xmlChar *ptr;
6245 xmlChar cur;
6246 xmlEntityPtr ent = NULL;
6247
6248 if ((str == NULL) || (*str == NULL))
6249 return(NULL);
6250 ptr = *str;
6251 cur = *ptr;
6252 if (cur == '&') {
6253 ptr++;
6254 cur = *ptr;
6255 name = xmlParseStringName(ctxt, &ptr);
6256 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006257 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6258 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006259 } else {
6260 if (*ptr == ';') {
6261 ptr++;
6262 /*
6263 * Ask first SAX for entity resolution, otherwise try the
6264 * predefined set.
6265 */
6266 if (ctxt->sax != NULL) {
6267 if (ctxt->sax->getEntity != NULL)
6268 ent = ctxt->sax->getEntity(ctxt->userData, name);
6269 if (ent == NULL)
6270 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006271 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006272 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006273 }
Owen Taylor3473f882001-02-23 17:55:21 +00006274 }
6275 /*
6276 * [ WFC: Entity Declared ]
6277 * In a document without any DTD, a document with only an
6278 * internal DTD subset which contains no parameter entity
6279 * references, or a document with "standalone='yes'", the
6280 * Name given in the entity reference must match that in an
6281 * entity declaration, except that well-formed documents
6282 * need not declare any of the following entities: amp, lt,
6283 * gt, apos, quot.
6284 * The declaration of a parameter entity must precede any
6285 * reference to it.
6286 * Similarly, the declaration of a general entity must
6287 * precede any reference to it which appears in a default
6288 * value in an attribute-list declaration. Note that if
6289 * entities are declared in the external subset or in
6290 * external parameter entities, a non-validating processor
6291 * is not obligated to read and process their declarations;
6292 * for such documents, the rule that an entity must be
6293 * declared is a well-formedness constraint only if
6294 * standalone='yes'.
6295 */
6296 if (ent == NULL) {
6297 if ((ctxt->standalone == 1) ||
6298 ((ctxt->hasExternalSubset == 0) &&
6299 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006300 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006301 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006302 } else {
6303 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6304 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6305 ctxt->sax->warning(ctxt->userData,
6306 "Entity '%s' not defined\n", name);
6307 }
6308 }
6309
6310 /*
6311 * [ WFC: Parsed Entity ]
6312 * An entity reference must not contain the name of an
6313 * unparsed entity
6314 */
6315 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6316 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6318 ctxt->sax->error(ctxt->userData,
6319 "Entity reference to unparsed entity %s\n", name);
6320 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006321 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006322 }
6323
6324 /*
6325 * [ WFC: No External Entity References ]
6326 * Attribute values cannot contain direct or indirect
6327 * entity references to external entities.
6328 */
6329 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6330 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6331 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6333 ctxt->sax->error(ctxt->userData,
6334 "Attribute references external entity '%s'\n", name);
6335 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006336 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338 /*
6339 * [ WFC: No < in Attribute Values ]
6340 * The replacement text of any entity referred to directly or
6341 * indirectly in an attribute value (other than "&lt;") must
6342 * not contain a <.
6343 */
6344 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6345 (ent != NULL) &&
6346 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6347 (ent->content != NULL) &&
6348 (xmlStrchr(ent->content, '<'))) {
6349 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6351 ctxt->sax->error(ctxt->userData,
6352 "'<' in entity '%s' is not allowed in attributes values\n", name);
6353 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006354 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006355 }
6356
6357 /*
6358 * Internal check, no parameter entities here ...
6359 */
6360 else {
6361 switch (ent->etype) {
6362 case XML_INTERNAL_PARAMETER_ENTITY:
6363 case XML_EXTERNAL_PARAMETER_ENTITY:
6364 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6366 ctxt->sax->error(ctxt->userData,
6367 "Attempt to reference the parameter entity '%s'\n", name);
6368 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006369 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006370 break;
6371 default:
6372 break;
6373 }
6374 }
6375
6376 /*
6377 * [ WFC: No Recursion ]
6378 * A parsed entity must not contain a recursive reference
6379 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006380 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006381 */
6382
6383 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006384 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006385 }
6386 xmlFree(name);
6387 }
6388 }
6389 *str = ptr;
6390 return(ent);
6391}
6392
6393/**
6394 * xmlParsePEReference:
6395 * @ctxt: an XML parser context
6396 *
6397 * parse PEReference declarations
6398 * The entity content is handled directly by pushing it's content as
6399 * a new input stream.
6400 *
6401 * [69] PEReference ::= '%' Name ';'
6402 *
6403 * [ WFC: No Recursion ]
6404 * A parsed entity must not contain a recursive
6405 * reference to itself, either directly or indirectly.
6406 *
6407 * [ WFC: Entity Declared ]
6408 * In a document without any DTD, a document with only an internal DTD
6409 * subset which contains no parameter entity references, or a document
6410 * with "standalone='yes'", ... ... The declaration of a parameter
6411 * entity must precede any reference to it...
6412 *
6413 * [ VC: Entity Declared ]
6414 * In a document with an external subset or external parameter entities
6415 * with "standalone='no'", ... ... The declaration of a parameter entity
6416 * must precede any reference to it...
6417 *
6418 * [ WFC: In DTD ]
6419 * Parameter-entity references may only appear in the DTD.
6420 * NOTE: misleading but this is handled.
6421 */
6422void
6423xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006424 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006425 xmlEntityPtr entity = NULL;
6426 xmlParserInputPtr input;
6427
6428 if (RAW == '%') {
6429 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006430 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006431 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006432 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6433 "xmlParsePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006434 } else {
6435 if (RAW == ';') {
6436 NEXT;
6437 if ((ctxt->sax != NULL) &&
6438 (ctxt->sax->getParameterEntity != NULL))
6439 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6440 name);
6441 if (entity == NULL) {
6442 /*
6443 * [ WFC: Entity Declared ]
6444 * In a document without any DTD, a document with only an
6445 * internal DTD subset which contains no parameter entity
6446 * references, or a document with "standalone='yes'", ...
6447 * ... The declaration of a parameter entity must precede
6448 * any reference to it...
6449 */
6450 if ((ctxt->standalone == 1) ||
6451 ((ctxt->hasExternalSubset == 0) &&
6452 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006453 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006454 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006455 } else {
6456 /*
6457 * [ VC: Entity Declared ]
6458 * In a document with an external subset or external
6459 * parameter entities with "standalone='no'", ...
6460 * ... The declaration of a parameter entity must precede
6461 * any reference to it...
6462 */
6463 if ((!ctxt->disableSAX) &&
6464 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6465 ctxt->sax->warning(ctxt->userData,
6466 "PEReference: %%%s; not found\n", name);
6467 ctxt->valid = 0;
6468 }
6469 } else {
6470 /*
6471 * Internal checking in case the entity quest barfed
6472 */
6473 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6474 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6475 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6476 ctxt->sax->warning(ctxt->userData,
6477 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006478 } else if (ctxt->input->free != deallocblankswrapper) {
6479 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6480 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 } else {
6482 /*
6483 * TODO !!!
6484 * handle the extra spaces added before and after
6485 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6486 */
6487 input = xmlNewEntityInputStream(ctxt, entity);
6488 xmlPushInput(ctxt, input);
6489 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6490 (RAW == '<') && (NXT(1) == '?') &&
6491 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6492 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6493 xmlParseTextDecl(ctxt);
6494 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6495 /*
6496 * The XML REC instructs us to stop parsing
6497 * right here
6498 */
6499 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006500 return;
6501 }
6502 }
Owen Taylor3473f882001-02-23 17:55:21 +00006503 }
6504 }
6505 ctxt->hasPErefs = 1;
6506 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006507 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006508 }
Owen Taylor3473f882001-02-23 17:55:21 +00006509 }
6510 }
6511}
6512
6513/**
6514 * xmlParseStringPEReference:
6515 * @ctxt: an XML parser context
6516 * @str: a pointer to an index in the string
6517 *
6518 * parse PEReference declarations
6519 *
6520 * [69] PEReference ::= '%' Name ';'
6521 *
6522 * [ WFC: No Recursion ]
6523 * A parsed entity must not contain a recursive
6524 * reference to itself, either directly or indirectly.
6525 *
6526 * [ WFC: Entity Declared ]
6527 * In a document without any DTD, a document with only an internal DTD
6528 * subset which contains no parameter entity references, or a document
6529 * with "standalone='yes'", ... ... The declaration of a parameter
6530 * entity must precede any reference to it...
6531 *
6532 * [ VC: Entity Declared ]
6533 * In a document with an external subset or external parameter entities
6534 * with "standalone='no'", ... ... The declaration of a parameter entity
6535 * must precede any reference to it...
6536 *
6537 * [ WFC: In DTD ]
6538 * Parameter-entity references may only appear in the DTD.
6539 * NOTE: misleading but this is handled.
6540 *
6541 * Returns the string of the entity content.
6542 * str is updated to the current value of the index
6543 */
6544xmlEntityPtr
6545xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6546 const xmlChar *ptr;
6547 xmlChar cur;
6548 xmlChar *name;
6549 xmlEntityPtr entity = NULL;
6550
6551 if ((str == NULL) || (*str == NULL)) return(NULL);
6552 ptr = *str;
6553 cur = *ptr;
6554 if (cur == '%') {
6555 ptr++;
6556 cur = *ptr;
6557 name = xmlParseStringName(ctxt, &ptr);
6558 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006559 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6560 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006561 } else {
6562 cur = *ptr;
6563 if (cur == ';') {
6564 ptr++;
6565 cur = *ptr;
6566 if ((ctxt->sax != NULL) &&
6567 (ctxt->sax->getParameterEntity != NULL))
6568 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6569 name);
6570 if (entity == NULL) {
6571 /*
6572 * [ WFC: Entity Declared ]
6573 * In a document without any DTD, a document with only an
6574 * internal DTD subset which contains no parameter entity
6575 * references, or a document with "standalone='yes'", ...
6576 * ... The declaration of a parameter entity must precede
6577 * any reference to it...
6578 */
6579 if ((ctxt->standalone == 1) ||
6580 ((ctxt->hasExternalSubset == 0) &&
6581 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006582 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006583 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006584 } else {
6585 /*
6586 * [ VC: Entity Declared ]
6587 * In a document with an external subset or external
6588 * parameter entities with "standalone='no'", ...
6589 * ... The declaration of a parameter entity must
6590 * precede any reference to it...
6591 */
6592 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6593 ctxt->sax->warning(ctxt->userData,
6594 "PEReference: %%%s; not found\n", name);
6595 ctxt->valid = 0;
6596 }
6597 } else {
6598 /*
6599 * Internal checking in case the entity quest barfed
6600 */
6601 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6602 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6603 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6604 ctxt->sax->warning(ctxt->userData,
6605 "Internal: %%%s; is not a parameter entity\n", name);
6606 }
6607 }
6608 ctxt->hasPErefs = 1;
6609 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006610 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006611 }
6612 xmlFree(name);
6613 }
6614 }
6615 *str = ptr;
6616 return(entity);
6617}
6618
6619/**
6620 * xmlParseDocTypeDecl:
6621 * @ctxt: an XML parser context
6622 *
6623 * parse a DOCTYPE declaration
6624 *
6625 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6626 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6627 *
6628 * [ VC: Root Element Type ]
6629 * The Name in the document type declaration must match the element
6630 * type of the root element.
6631 */
6632
6633void
6634xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006635 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006636 xmlChar *ExternalID = NULL;
6637 xmlChar *URI = NULL;
6638
6639 /*
6640 * We know that '<!DOCTYPE' has been detected.
6641 */
6642 SKIP(9);
6643
6644 SKIP_BLANKS;
6645
6646 /*
6647 * Parse the DOCTYPE name.
6648 */
6649 name = xmlParseName(ctxt);
6650 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6652 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006653 }
6654 ctxt->intSubName = name;
6655
6656 SKIP_BLANKS;
6657
6658 /*
6659 * Check for SystemID and ExternalID
6660 */
6661 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6662
6663 if ((URI != NULL) || (ExternalID != NULL)) {
6664 ctxt->hasExternalSubset = 1;
6665 }
6666 ctxt->extSubURI = URI;
6667 ctxt->extSubSystem = ExternalID;
6668
6669 SKIP_BLANKS;
6670
6671 /*
6672 * Create and update the internal subset.
6673 */
6674 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6675 (!ctxt->disableSAX))
6676 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6677
6678 /*
6679 * Is there any internal subset declarations ?
6680 * they are handled separately in xmlParseInternalSubset()
6681 */
6682 if (RAW == '[')
6683 return;
6684
6685 /*
6686 * We should be at the end of the DOCTYPE declaration.
6687 */
6688 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006689 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006690 }
6691 NEXT;
6692}
6693
6694/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006695 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006696 * @ctxt: an XML parser context
6697 *
6698 * parse the internal subset declaration
6699 *
6700 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6701 */
6702
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006703static void
Owen Taylor3473f882001-02-23 17:55:21 +00006704xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6705 /*
6706 * Is there any DTD definition ?
6707 */
6708 if (RAW == '[') {
6709 ctxt->instate = XML_PARSER_DTD;
6710 NEXT;
6711 /*
6712 * Parse the succession of Markup declarations and
6713 * PEReferences.
6714 * Subsequence (markupdecl | PEReference | S)*
6715 */
6716 while (RAW != ']') {
6717 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006718 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006719
6720 SKIP_BLANKS;
6721 xmlParseMarkupDecl(ctxt);
6722 xmlParsePEReference(ctxt);
6723
6724 /*
6725 * Pop-up of finished entities.
6726 */
6727 while ((RAW == 0) && (ctxt->inputNr > 1))
6728 xmlPopInput(ctxt);
6729
6730 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006731 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006732 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006733 break;
6734 }
6735 }
6736 if (RAW == ']') {
6737 NEXT;
6738 SKIP_BLANKS;
6739 }
6740 }
6741
6742 /*
6743 * We should be at the end of the DOCTYPE declaration.
6744 */
6745 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006746 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006747 }
6748 NEXT;
6749}
6750
Daniel Veillard81273902003-09-30 00:43:48 +00006751#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006752/**
6753 * xmlParseAttribute:
6754 * @ctxt: an XML parser context
6755 * @value: a xmlChar ** used to store the value of the attribute
6756 *
6757 * parse an attribute
6758 *
6759 * [41] Attribute ::= Name Eq AttValue
6760 *
6761 * [ WFC: No External Entity References ]
6762 * Attribute values cannot contain direct or indirect entity references
6763 * to external entities.
6764 *
6765 * [ WFC: No < in Attribute Values ]
6766 * The replacement text of any entity referred to directly or indirectly in
6767 * an attribute value (other than "&lt;") must not contain a <.
6768 *
6769 * [ VC: Attribute Value Type ]
6770 * The attribute must have been declared; the value must be of the type
6771 * declared for it.
6772 *
6773 * [25] Eq ::= S? '=' S?
6774 *
6775 * With namespace:
6776 *
6777 * [NS 11] Attribute ::= QName Eq AttValue
6778 *
6779 * Also the case QName == xmlns:??? is handled independently as a namespace
6780 * definition.
6781 *
6782 * Returns the attribute name, and the value in *value.
6783 */
6784
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006785const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006786xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006787 const xmlChar *name;
6788 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006789
6790 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006791 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006792 name = xmlParseName(ctxt);
6793 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006794 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6795 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006796 return(NULL);
6797 }
6798
6799 /*
6800 * read the value
6801 */
6802 SKIP_BLANKS;
6803 if (RAW == '=') {
6804 NEXT;
6805 SKIP_BLANKS;
6806 val = xmlParseAttValue(ctxt);
6807 ctxt->instate = XML_PARSER_CONTENT;
6808 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006809 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006810 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006811 return(NULL);
6812 }
6813
6814 /*
6815 * Check that xml:lang conforms to the specification
6816 * No more registered as an error, just generate a warning now
6817 * since this was deprecated in XML second edition
6818 */
6819 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6820 if (!xmlCheckLanguageID(val)) {
6821 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6822 ctxt->sax->warning(ctxt->userData,
6823 "Malformed value for xml:lang : %s\n", val);
6824 }
6825 }
6826
6827 /*
6828 * Check that xml:space conforms to the specification
6829 */
6830 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6831 if (xmlStrEqual(val, BAD_CAST "default"))
6832 *(ctxt->space) = 0;
6833 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6834 *(ctxt->space) = 1;
6835 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006836 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006837"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006838 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006839 }
6840 }
6841
6842 *value = val;
6843 return(name);
6844}
6845
6846/**
6847 * xmlParseStartTag:
6848 * @ctxt: an XML parser context
6849 *
6850 * parse a start of tag either for rule element or
6851 * EmptyElement. In both case we don't parse the tag closing chars.
6852 *
6853 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6854 *
6855 * [ WFC: Unique Att Spec ]
6856 * No attribute name may appear more than once in the same start-tag or
6857 * empty-element tag.
6858 *
6859 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6860 *
6861 * [ WFC: Unique Att Spec ]
6862 * No attribute name may appear more than once in the same start-tag or
6863 * empty-element tag.
6864 *
6865 * With namespace:
6866 *
6867 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6868 *
6869 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6870 *
6871 * Returns the element name parsed
6872 */
6873
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006874const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006875xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006876 const xmlChar *name;
6877 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006878 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006879 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006880 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006881 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006882 int i;
6883
6884 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006885 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006886
6887 name = xmlParseName(ctxt);
6888 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006889 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006890 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006891 return(NULL);
6892 }
6893
6894 /*
6895 * Now parse the attributes, it ends up with the ending
6896 *
6897 * (S Attribute)* S?
6898 */
6899 SKIP_BLANKS;
6900 GROW;
6901
Daniel Veillard21a0f912001-02-25 19:54:14 +00006902 while ((RAW != '>') &&
6903 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006904 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006905 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006906 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006907
6908 attname = xmlParseAttribute(ctxt, &attvalue);
6909 if ((attname != NULL) && (attvalue != NULL)) {
6910 /*
6911 * [ WFC: Unique Att Spec ]
6912 * No attribute name may appear more than once in the same
6913 * start-tag or empty-element tag.
6914 */
6915 for (i = 0; i < nbatts;i += 2) {
6916 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006917 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006918 xmlFree(attvalue);
6919 goto failed;
6920 }
6921 }
Owen Taylor3473f882001-02-23 17:55:21 +00006922 /*
6923 * Add the pair to atts
6924 */
6925 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006926 maxatts = 22; /* allow for 10 attrs by default */
6927 atts = (const xmlChar **)
6928 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006929 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006930 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006931 if (attvalue != NULL)
6932 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006933 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006934 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006935 ctxt->atts = atts;
6936 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006937 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006938 const xmlChar **n;
6939
Owen Taylor3473f882001-02-23 17:55:21 +00006940 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006941 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006942 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006943 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006944 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006945 if (attvalue != NULL)
6946 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006947 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006948 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006949 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006950 ctxt->atts = atts;
6951 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006952 }
6953 atts[nbatts++] = attname;
6954 atts[nbatts++] = attvalue;
6955 atts[nbatts] = NULL;
6956 atts[nbatts + 1] = NULL;
6957 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006958 if (attvalue != NULL)
6959 xmlFree(attvalue);
6960 }
6961
6962failed:
6963
Daniel Veillard3772de32002-12-17 10:31:45 +00006964 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006965 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6966 break;
6967 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6969 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006970 }
6971 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006972 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6973 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006974 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6975 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006976 break;
6977 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006978 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006979 GROW;
6980 }
6981
6982 /*
6983 * SAX: Start of Element !
6984 */
6985 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006986 (!ctxt->disableSAX)) {
6987 if (nbatts > 0)
6988 ctxt->sax->startElement(ctxt->userData, name, atts);
6989 else
6990 ctxt->sax->startElement(ctxt->userData, name, NULL);
6991 }
Owen Taylor3473f882001-02-23 17:55:21 +00006992
6993 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006994 /* Free only the content strings */
6995 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006996 if (atts[i] != NULL)
6997 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006998 }
6999 return(name);
7000}
7001
7002/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007003 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007004 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007005 * @line: line of the start tag
7006 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007007 *
7008 * parse an end of tag
7009 *
7010 * [42] ETag ::= '</' Name S? '>'
7011 *
7012 * With namespace
7013 *
7014 * [NS 9] ETag ::= '</' QName S? '>'
7015 */
7016
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007017static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007018xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007019 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007020
7021 GROW;
7022 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007023 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7024 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007025 return;
7026 }
7027 SKIP(2);
7028
Daniel Veillard46de64e2002-05-29 08:21:33 +00007029 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007030
7031 /*
7032 * We should definitely be at the ending "S? '>'" part
7033 */
7034 GROW;
7035 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00007036 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007037 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007038 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007039 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007040
7041 /*
7042 * [ WFC: Element Type Match ]
7043 * The Name in an element's end-tag must match the element type in the
7044 * start-tag.
7045 *
7046 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007047 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00007048 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
7049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00007050 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007051 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007052 "Opening and ending tag mismatch: %s line %d and %s\n",
7053 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00007054 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007055 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007056 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007057 }
7058
7059 }
7060 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007061 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007062 }
7063
7064 /*
7065 * SAX: End of Tag
7066 */
7067 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7068 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007069 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007070
Daniel Veillarde57ec792003-09-10 10:50:59 +00007071 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007072 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007073 return;
7074}
7075
7076/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007077 * xmlParseEndTag:
7078 * @ctxt: an XML parser context
7079 *
7080 * parse an end of tag
7081 *
7082 * [42] ETag ::= '</' Name S? '>'
7083 *
7084 * With namespace
7085 *
7086 * [NS 9] ETag ::= '</' QName S? '>'
7087 */
7088
7089void
7090xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007091 xmlParseEndTag1(ctxt, 0);
7092}
Daniel Veillard81273902003-09-30 00:43:48 +00007093#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007094
7095/************************************************************************
7096 * *
7097 * SAX 2 specific operations *
7098 * *
7099 ************************************************************************/
7100
7101static const xmlChar *
7102xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7103 int len = 0, l;
7104 int c;
7105 int count = 0;
7106
7107 /*
7108 * Handler for more complex cases
7109 */
7110 GROW;
7111 c = CUR_CHAR(l);
7112 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007113 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007114 return(NULL);
7115 }
7116
7117 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7118 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007119 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard0fb18932003-09-07 09:14:37 +00007120 (IS_COMBINING(c)) ||
7121 (IS_EXTENDER(c)))) {
7122 if (count++ > 100) {
7123 count = 0;
7124 GROW;
7125 }
7126 len += l;
7127 NEXTL(l);
7128 c = CUR_CHAR(l);
7129 }
7130 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7131}
7132
7133/*
7134 * xmlGetNamespace:
7135 * @ctxt: an XML parser context
7136 * @prefix: the prefix to lookup
7137 *
7138 * Lookup the namespace name for the @prefix (which ca be NULL)
7139 * The prefix must come from the @ctxt->dict dictionnary
7140 *
7141 * Returns the namespace name or NULL if not bound
7142 */
7143static const xmlChar *
7144xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7145 int i;
7146
Daniel Veillarde57ec792003-09-10 10:50:59 +00007147 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007148 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007149 if (ctxt->nsTab[i] == prefix) {
7150 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7151 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007152 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007153 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007154 return(NULL);
7155}
7156
7157/**
7158 * xmlParseNCName:
7159 * @ctxt: an XML parser context
7160 *
7161 * parse an XML name.
7162 *
7163 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7164 * CombiningChar | Extender
7165 *
7166 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7167 *
7168 * Returns the Name parsed or NULL
7169 */
7170
7171static const xmlChar *
7172xmlParseNCName(xmlParserCtxtPtr ctxt) {
7173 const xmlChar *in;
7174 const xmlChar *ret;
7175 int count = 0;
7176
7177 /*
7178 * Accelerator for simple ASCII names
7179 */
7180 in = ctxt->input->cur;
7181 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7182 ((*in >= 0x41) && (*in <= 0x5A)) ||
7183 (*in == '_')) {
7184 in++;
7185 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7186 ((*in >= 0x41) && (*in <= 0x5A)) ||
7187 ((*in >= 0x30) && (*in <= 0x39)) ||
7188 (*in == '_') || (*in == '-') ||
7189 (*in == '.'))
7190 in++;
7191 if ((*in > 0) && (*in < 0x80)) {
7192 count = in - ctxt->input->cur;
7193 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7194 ctxt->input->cur = in;
7195 ctxt->nbChars += count;
7196 ctxt->input->col += count;
7197 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007198 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007199 }
7200 return(ret);
7201 }
7202 }
7203 return(xmlParseNCNameComplex(ctxt));
7204}
7205
7206/**
7207 * xmlParseQName:
7208 * @ctxt: an XML parser context
7209 * @prefix: pointer to store the prefix part
7210 *
7211 * parse an XML Namespace QName
7212 *
7213 * [6] QName ::= (Prefix ':')? LocalPart
7214 * [7] Prefix ::= NCName
7215 * [8] LocalPart ::= NCName
7216 *
7217 * Returns the Name parsed or NULL
7218 */
7219
7220static const xmlChar *
7221xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7222 const xmlChar *l, *p;
7223
7224 GROW;
7225
7226 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007227 if (l == NULL) {
7228 if (CUR == ':') {
7229 l = xmlParseName(ctxt);
7230 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007231 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7232 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007233 *prefix = NULL;
7234 return(l);
7235 }
7236 }
7237 return(NULL);
7238 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007239 if (CUR == ':') {
7240 NEXT;
7241 p = l;
7242 l = xmlParseNCName(ctxt);
7243 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007244 xmlChar *tmp;
7245
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007246 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7247 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007248 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7249 p = xmlDictLookup(ctxt->dict, tmp, -1);
7250 if (tmp != NULL) xmlFree(tmp);
7251 *prefix = NULL;
7252 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007253 }
7254 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007255 xmlChar *tmp;
7256
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007257 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7258 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007259 NEXT;
7260 tmp = (xmlChar *) xmlParseName(ctxt);
7261 if (tmp != NULL) {
7262 tmp = xmlBuildQName(tmp, l, NULL, 0);
7263 l = xmlDictLookup(ctxt->dict, tmp, -1);
7264 if (tmp != NULL) xmlFree(tmp);
7265 *prefix = p;
7266 return(l);
7267 }
7268 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7269 l = xmlDictLookup(ctxt->dict, tmp, -1);
7270 if (tmp != NULL) xmlFree(tmp);
7271 *prefix = p;
7272 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007273 }
7274 *prefix = p;
7275 } else
7276 *prefix = NULL;
7277 return(l);
7278}
7279
7280/**
7281 * xmlParseQNameAndCompare:
7282 * @ctxt: an XML parser context
7283 * @name: the localname
7284 * @prefix: the prefix, if any.
7285 *
7286 * parse an XML name and compares for match
7287 * (specialized for endtag parsing)
7288 *
7289 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7290 * and the name for mismatch
7291 */
7292
7293static const xmlChar *
7294xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7295 xmlChar const *prefix) {
7296 const xmlChar *cmp = name;
7297 const xmlChar *in;
7298 const xmlChar *ret;
7299 const xmlChar *prefix2;
7300
7301 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7302
7303 GROW;
7304 in = ctxt->input->cur;
7305
7306 cmp = prefix;
7307 while (*in != 0 && *in == *cmp) {
7308 ++in;
7309 ++cmp;
7310 }
7311 if ((*cmp == 0) && (*in == ':')) {
7312 in++;
7313 cmp = name;
7314 while (*in != 0 && *in == *cmp) {
7315 ++in;
7316 ++cmp;
7317 }
7318 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7319 /* success */
7320 ctxt->input->cur = in;
7321 return((const xmlChar*) 1);
7322 }
7323 }
7324 /*
7325 * all strings coms from the dictionary, equality can be done directly
7326 */
7327 ret = xmlParseQName (ctxt, &prefix2);
7328 if ((ret == name) && (prefix == prefix2))
7329 return((const xmlChar*) 1);
7330 return ret;
7331}
7332
7333/**
7334 * xmlParseAttValueInternal:
7335 * @ctxt: an XML parser context
7336 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007337 * @alloc: whether the attribute was reallocated as a new string
7338 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007339 *
7340 * parse a value for an attribute.
7341 * NOTE: if no normalization is needed, the routine will return pointers
7342 * directly from the data buffer.
7343 *
7344 * 3.3.3 Attribute-Value Normalization:
7345 * Before the value of an attribute is passed to the application or
7346 * checked for validity, the XML processor must normalize it as follows:
7347 * - a character reference is processed by appending the referenced
7348 * character to the attribute value
7349 * - an entity reference is processed by recursively processing the
7350 * replacement text of the entity
7351 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7352 * appending #x20 to the normalized value, except that only a single
7353 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7354 * parsed entity or the literal entity value of an internal parsed entity
7355 * - other characters are processed by appending them to the normalized value
7356 * If the declared value is not CDATA, then the XML processor must further
7357 * process the normalized attribute value by discarding any leading and
7358 * trailing space (#x20) characters, and by replacing sequences of space
7359 * (#x20) characters by a single space (#x20) character.
7360 * All attributes for which no declaration has been read should be treated
7361 * by a non-validating parser as if declared CDATA.
7362 *
7363 * Returns the AttValue parsed or NULL. The value has to be freed by the
7364 * caller if it was copied, this can be detected by val[*len] == 0.
7365 */
7366
7367static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007368xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7369 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007370{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007372 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007373 xmlChar *ret = NULL;
7374
7375 GROW;
7376 in = (xmlChar *) CUR_PTR;
7377 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007378 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007379 return (NULL);
7380 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007382
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007383 /*
7384 * try to handle in this routine the most common case where no
7385 * allocation of a new string is required and where content is
7386 * pure ASCII.
7387 */
7388 limit = *in++;
7389 end = ctxt->input->end;
7390 start = in;
7391 if (in >= end) {
7392 const xmlChar *oldbase = ctxt->input->base;
7393 GROW;
7394 if (oldbase != ctxt->input->base) {
7395 long delta = ctxt->input->base - oldbase;
7396 start = start + delta;
7397 in = in + delta;
7398 }
7399 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007401 if (normalize) {
7402 /*
7403 * Skip any leading spaces
7404 */
7405 while ((in < end) && (*in != limit) &&
7406 ((*in == 0x20) || (*in == 0x9) ||
7407 (*in == 0xA) || (*in == 0xD))) {
7408 in++;
7409 start = in;
7410 if (in >= end) {
7411 const xmlChar *oldbase = ctxt->input->base;
7412 GROW;
7413 if (oldbase != ctxt->input->base) {
7414 long delta = ctxt->input->base - oldbase;
7415 start = start + delta;
7416 in = in + delta;
7417 }
7418 end = ctxt->input->end;
7419 }
7420 }
7421 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7422 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7423 if ((*in++ == 0x20) && (*in == 0x20)) break;
7424 if (in >= end) {
7425 const xmlChar *oldbase = ctxt->input->base;
7426 GROW;
7427 if (oldbase != ctxt->input->base) {
7428 long delta = ctxt->input->base - oldbase;
7429 start = start + delta;
7430 in = in + delta;
7431 }
7432 end = ctxt->input->end;
7433 }
7434 }
7435 last = in;
7436 /*
7437 * skip the trailing blanks
7438 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007439 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007440 while ((in < end) && (*in != limit) &&
7441 ((*in == 0x20) || (*in == 0x9) ||
7442 (*in == 0xA) || (*in == 0xD))) {
7443 in++;
7444 if (in >= end) {
7445 const xmlChar *oldbase = ctxt->input->base;
7446 GROW;
7447 if (oldbase != ctxt->input->base) {
7448 long delta = ctxt->input->base - oldbase;
7449 start = start + delta;
7450 in = in + delta;
7451 last = last + delta;
7452 }
7453 end = ctxt->input->end;
7454 }
7455 }
7456 if (*in != limit) goto need_complex;
7457 } else {
7458 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7459 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7460 in++;
7461 if (in >= end) {
7462 const xmlChar *oldbase = ctxt->input->base;
7463 GROW;
7464 if (oldbase != ctxt->input->base) {
7465 long delta = ctxt->input->base - oldbase;
7466 start = start + delta;
7467 in = in + delta;
7468 }
7469 end = ctxt->input->end;
7470 }
7471 }
7472 last = in;
7473 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007474 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007475 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007477 *len = last - start;
7478 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007479 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007480 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007481 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007482 }
7483 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007484 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007485 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007486need_complex:
7487 if (alloc) *alloc = 1;
7488 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007489}
7490
7491/**
7492 * xmlParseAttribute2:
7493 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007494 * @pref: the element prefix
7495 * @elem: the element name
7496 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007497 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007498 * @len: an int * to save the length of the attribute
7499 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007500 *
7501 * parse an attribute in the new SAX2 framework.
7502 *
7503 * Returns the attribute name, and the value in *value, .
7504 */
7505
7506static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007507xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7508 const xmlChar *pref, const xmlChar *elem,
7509 const xmlChar **prefix, xmlChar **value,
7510 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511 const xmlChar *name;
7512 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007513 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007514
7515 *value = NULL;
7516 GROW;
7517 name = xmlParseQName(ctxt, prefix);
7518 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007519 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7520 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521 return(NULL);
7522 }
7523
7524 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007525 * get the type if needed
7526 */
7527 if (ctxt->attsSpecial != NULL) {
7528 int type;
7529
7530 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7531 pref, elem, *prefix, name);
7532 if (type != 0) normalize = 1;
7533 }
7534
7535 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007536 * read the value
7537 */
7538 SKIP_BLANKS;
7539 if (RAW == '=') {
7540 NEXT;
7541 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007542 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007543 ctxt->instate = XML_PARSER_CONTENT;
7544 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007545 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007546 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007547 return(NULL);
7548 }
7549
7550 /*
7551 * Check that xml:lang conforms to the specification
7552 * No more registered as an error, just generate a warning now
7553 * since this was deprecated in XML second edition
7554 */
7555 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7556 if (!xmlCheckLanguageID(val)) {
7557 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7558 ctxt->sax->warning(ctxt->userData,
7559 "Malformed value for xml:lang : %s\n", val);
7560 }
7561 }
7562
7563 /*
7564 * Check that xml:space conforms to the specification
7565 */
7566 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7567 if (xmlStrEqual(val, BAD_CAST "default"))
7568 *(ctxt->space) = 0;
7569 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7570 *(ctxt->space) = 1;
7571 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007572 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007573"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7574 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007575 }
7576 }
7577
7578 *value = val;
7579 return(name);
7580}
7581
7582/**
7583 * xmlParseStartTag2:
7584 * @ctxt: an XML parser context
7585 *
7586 * parse a start of tag either for rule element or
7587 * EmptyElement. In both case we don't parse the tag closing chars.
7588 * This routine is called when running SAX2 parsing
7589 *
7590 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7591 *
7592 * [ WFC: Unique Att Spec ]
7593 * No attribute name may appear more than once in the same start-tag or
7594 * empty-element tag.
7595 *
7596 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7597 *
7598 * [ WFC: Unique Att Spec ]
7599 * No attribute name may appear more than once in the same start-tag or
7600 * empty-element tag.
7601 *
7602 * With namespace:
7603 *
7604 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7605 *
7606 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7607 *
7608 * Returns the element name parsed
7609 */
7610
7611static const xmlChar *
7612xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7613 const xmlChar **URI) {
7614 const xmlChar *localname;
7615 const xmlChar *prefix;
7616 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007617 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007618 const xmlChar *nsname;
7619 xmlChar *attvalue;
7620 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007621 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007622 int nratts, nbatts, nbdef;
7623 int i, j, nbNs, attval;
7624 const xmlChar *base;
7625 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007626
7627 if (RAW != '<') return(NULL);
7628 NEXT1;
7629
7630 /*
7631 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7632 * point since the attribute values may be stored as pointers to
7633 * the buffer and calling SHRINK would destroy them !
7634 * The Shrinking is only possible once the full set of attribute
7635 * callbacks have been done.
7636 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007637reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007639 base = ctxt->input->base;
7640 cur = ctxt->input->cur - ctxt->input->base;
7641 nbatts = 0;
7642 nratts = 0;
7643 nbdef = 0;
7644 nbNs = 0;
7645 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007646
7647 localname = xmlParseQName(ctxt, &prefix);
7648 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007649 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7650 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007651 return(NULL);
7652 }
7653
7654 /*
7655 * Now parse the attributes, it ends up with the ending
7656 *
7657 * (S Attribute)* S?
7658 */
7659 SKIP_BLANKS;
7660 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007661 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007662
7663 while ((RAW != '>') &&
7664 ((RAW != '/') || (NXT(1) != '>')) &&
7665 (IS_CHAR((unsigned int) RAW))) {
7666 const xmlChar *q = CUR_PTR;
7667 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007668 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007669
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007670 attname = xmlParseAttribute2(ctxt, prefix, localname,
7671 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 if ((attname != NULL) && (attvalue != NULL)) {
7673 if (len < 0) len = xmlStrlen(attvalue);
7674 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007675 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7676 xmlURIPtr uri;
7677
7678 if (*URL != 0) {
7679 uri = xmlParseURI((const char *) URL);
7680 if (uri == NULL) {
7681 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7682 ctxt->sax->warning(ctxt->userData,
7683 "xmlns: %s not a valid URI\n", URL);
7684 } else {
7685 if (uri->scheme == NULL) {
7686 if ((ctxt->sax != NULL) &&
7687 (ctxt->sax->warning != NULL))
7688 ctxt->sax->warning(ctxt->userData,
7689 "xmlns: URI %s is not absolute\n", URL);
7690 }
7691 xmlFreeURI(uri);
7692 }
7693 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007694 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007695 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007696 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007697 for (j = 1;j <= nbNs;j++)
7698 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7699 break;
7700 if (j <= nbNs)
7701 xmlErrAttributeDup(ctxt, NULL, attname);
7702 else
7703 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007704 if (alloc != 0) xmlFree(attvalue);
7705 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007706 continue;
7707 }
7708 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007709 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7710 xmlURIPtr uri;
7711
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007712 if (attname == ctxt->str_xml) {
7713 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007714 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7715 "xml namespace prefix mapped to wrong URI\n",
7716 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007717 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007718 /*
7719 * Do not keep a namespace definition node
7720 */
7721 if (alloc != 0) xmlFree(attvalue);
7722 SKIP_BLANKS;
7723 continue;
7724 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007725 uri = xmlParseURI((const char *) URL);
7726 if (uri == NULL) {
7727 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7728 ctxt->sax->warning(ctxt->userData,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007729 "xmlns:%s: '%s' is not a valid URI\n",
7730 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007731 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007732 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007733 if ((ctxt->sax != NULL) &&
7734 (ctxt->sax->warning != NULL))
7735 ctxt->sax->warning(ctxt->userData,
7736 "xmlns:%s: URI %s is not absolute\n",
7737 attname, URL);
7738 }
7739 xmlFreeURI(uri);
7740 }
7741
Daniel Veillard0fb18932003-09-07 09:14:37 +00007742 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007743 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007744 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007745 for (j = 1;j <= nbNs;j++)
7746 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7747 break;
7748 if (j <= nbNs)
7749 xmlErrAttributeDup(ctxt, aprefix, attname);
7750 else
7751 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007752 if (alloc != 0) xmlFree(attvalue);
7753 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007754 continue;
7755 }
7756
7757 /*
7758 * Add the pair to atts
7759 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007760 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7761 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007762 if (attvalue[len] == 0)
7763 xmlFree(attvalue);
7764 goto failed;
7765 }
7766 maxatts = ctxt->maxatts;
7767 atts = ctxt->atts;
7768 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007769 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007770 atts[nbatts++] = attname;
7771 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007772 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007773 atts[nbatts++] = attvalue;
7774 attvalue += len;
7775 atts[nbatts++] = attvalue;
7776 /*
7777 * tag if some deallocation is needed
7778 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007779 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007780 } else {
7781 if ((attvalue != NULL) && (attvalue[len] == 0))
7782 xmlFree(attvalue);
7783 }
7784
7785failed:
7786
7787 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007788 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007789 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7790 break;
7791 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007792 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7793 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007794 }
7795 SKIP_BLANKS;
7796 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7797 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007798 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007799 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007800 break;
7801 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007802 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007803 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007804 }
7805
Daniel Veillard0fb18932003-09-07 09:14:37 +00007806 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007807 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007809 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007810 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7811 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007812 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007813 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007814 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007816 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007817 /*
7818 * [ WFC: Unique Att Spec ]
7819 * No attribute name may appear more than once in the same
7820 * start-tag or empty-element tag.
7821 * As extended by the Namespace in XML REC.
7822 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007823 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007824 if (atts[i] == atts[j]) {
7825 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007826 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007827 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007828 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007829 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007830 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007831 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007832 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007833 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007834 }
7835 }
7836 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007837 }
7838
7839 /*
7840 * The attributes defaulting
7841 */
7842 if (ctxt->attsDefault != NULL) {
7843 xmlDefAttrsPtr defaults;
7844
7845 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7846 if (defaults != NULL) {
7847 for (i = 0;i < defaults->nbAttrs;i++) {
7848 attname = defaults->values[4 * i];
7849 aprefix = defaults->values[4 * i + 1];
7850
7851 /*
7852 * special work for namespaces defaulted defs
7853 */
7854 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7855 /*
7856 * check that it's not a defined namespace
7857 */
7858 for (j = 1;j <= nbNs;j++)
7859 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7860 break;
7861 if (j <= nbNs) continue;
7862
7863 nsname = xmlGetNamespace(ctxt, NULL);
7864 if (nsname != defaults->values[4 * i + 2]) {
7865 if (nsPush(ctxt, NULL,
7866 defaults->values[4 * i + 2]) > 0)
7867 nbNs++;
7868 }
7869 } else if (aprefix == ctxt->str_xmlns) {
7870 /*
7871 * check that it's not a defined namespace
7872 */
7873 for (j = 1;j <= nbNs;j++)
7874 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7875 break;
7876 if (j <= nbNs) continue;
7877
7878 nsname = xmlGetNamespace(ctxt, attname);
7879 if (nsname != defaults->values[2]) {
7880 if (nsPush(ctxt, attname,
7881 defaults->values[4 * i + 2]) > 0)
7882 nbNs++;
7883 }
7884 } else {
7885 /*
7886 * check that it's not a defined attribute
7887 */
7888 for (j = 0;j < nbatts;j+=5) {
7889 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7890 break;
7891 }
7892 if (j < nbatts) continue;
7893
7894 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7895 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007896 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007897 }
7898 maxatts = ctxt->maxatts;
7899 atts = ctxt->atts;
7900 }
7901 atts[nbatts++] = attname;
7902 atts[nbatts++] = aprefix;
7903 if (aprefix == NULL)
7904 atts[nbatts++] = NULL;
7905 else
7906 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7907 atts[nbatts++] = defaults->values[4 * i + 2];
7908 atts[nbatts++] = defaults->values[4 * i + 3];
7909 nbdef++;
7910 }
7911 }
7912 }
7913 }
7914
7915 nsname = xmlGetNamespace(ctxt, prefix);
7916 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007917 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7918 "Namespace prefix %s on %s is not defined\n",
7919 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007920 }
7921 *pref = prefix;
7922 *URI = nsname;
7923
7924 /*
7925 * SAX: Start of Element !
7926 */
7927 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7928 (!ctxt->disableSAX)) {
7929 if (nbNs > 0)
7930 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7931 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7932 nbatts / 5, nbdef, atts);
7933 else
7934 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7935 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7936 }
7937
7938 /*
7939 * Free up attribute allocated strings if needed
7940 */
7941 if (attval != 0) {
7942 for (i = 3,j = 0; j < nratts;i += 5,j++)
7943 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7944 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 }
7946
7947 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007948
7949base_changed:
7950 /*
7951 * the attribute strings are valid iif the base didn't changed
7952 */
7953 if (attval != 0) {
7954 for (i = 3,j = 0; j < nratts;i += 5,j++)
7955 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7956 xmlFree((xmlChar *) atts[i]);
7957 }
7958 ctxt->input->cur = ctxt->input->base + cur;
7959 if (ctxt->wellFormed == 1) {
7960 goto reparse;
7961 }
7962 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007963}
7964
7965/**
7966 * xmlParseEndTag2:
7967 * @ctxt: an XML parser context
7968 * @line: line of the start tag
7969 * @nsNr: number of namespaces on the start tag
7970 *
7971 * parse an end of tag
7972 *
7973 * [42] ETag ::= '</' Name S? '>'
7974 *
7975 * With namespace
7976 *
7977 * [NS 9] ETag ::= '</' QName S? '>'
7978 */
7979
7980static void
7981xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7982 const xmlChar *URI, int line, int nsNr) {
7983 const xmlChar *name;
7984
7985 GROW;
7986 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007987 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007988 return;
7989 }
7990 SKIP(2);
7991
7992 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7993
7994 /*
7995 * We should definitely be at the ending "S? '>'" part
7996 */
7997 GROW;
7998 SKIP_BLANKS;
7999 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008000 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008001 } else
8002 NEXT1;
8003
8004 /*
8005 * [ WFC: Element Type Match ]
8006 * The Name in an element's end-tag must match the element type in the
8007 * start-tag.
8008 *
8009 */
8010 if (name != (xmlChar*)1) {
8011 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
8012 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
8013 if (name != NULL) {
8014 ctxt->sax->error(ctxt->userData,
8015 "Opening and ending tag mismatch: %s line %d and %s\n",
8016 ctxt->name, line, name);
8017 } else {
8018 ctxt->sax->error(ctxt->userData,
8019 "Ending tag error for: %s line %d\n", ctxt->name, line);
8020 }
8021
8022 }
8023 ctxt->wellFormed = 0;
8024 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8025 }
8026
8027 /*
8028 * SAX: End of Tag
8029 */
8030 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8031 (!ctxt->disableSAX))
8032 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8033
Daniel Veillard0fb18932003-09-07 09:14:37 +00008034 spacePop(ctxt);
8035 if (nsNr != 0)
8036 nsPop(ctxt, nsNr);
8037 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008038}
8039
8040/**
Owen Taylor3473f882001-02-23 17:55:21 +00008041 * xmlParseCDSect:
8042 * @ctxt: an XML parser context
8043 *
8044 * Parse escaped pure raw content.
8045 *
8046 * [18] CDSect ::= CDStart CData CDEnd
8047 *
8048 * [19] CDStart ::= '<![CDATA['
8049 *
8050 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8051 *
8052 * [21] CDEnd ::= ']]>'
8053 */
8054void
8055xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8056 xmlChar *buf = NULL;
8057 int len = 0;
8058 int size = XML_PARSER_BUFFER_SIZE;
8059 int r, rl;
8060 int s, sl;
8061 int cur, l;
8062 int count = 0;
8063
8064 if ((NXT(0) == '<') && (NXT(1) == '!') &&
8065 (NXT(2) == '[') && (NXT(3) == 'C') &&
8066 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8067 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8068 (NXT(8) == '[')) {
8069 SKIP(9);
8070 } else
8071 return;
8072
8073 ctxt->instate = XML_PARSER_CDATA_SECTION;
8074 r = CUR_CHAR(rl);
8075 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008076 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008077 ctxt->instate = XML_PARSER_CONTENT;
8078 return;
8079 }
8080 NEXTL(rl);
8081 s = CUR_CHAR(sl);
8082 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008083 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008084 ctxt->instate = XML_PARSER_CONTENT;
8085 return;
8086 }
8087 NEXTL(sl);
8088 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008089 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008090 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008091 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008092 return;
8093 }
8094 while (IS_CHAR(cur) &&
8095 ((r != ']') || (s != ']') || (cur != '>'))) {
8096 if (len + 5 >= size) {
8097 size *= 2;
8098 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8099 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008100 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008101 return;
8102 }
8103 }
8104 COPY_BUF(rl,buf,len,r);
8105 r = s;
8106 rl = sl;
8107 s = cur;
8108 sl = l;
8109 count++;
8110 if (count > 50) {
8111 GROW;
8112 count = 0;
8113 }
8114 NEXTL(l);
8115 cur = CUR_CHAR(l);
8116 }
8117 buf[len] = 0;
8118 ctxt->instate = XML_PARSER_CONTENT;
8119 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008120 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008121 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008122 xmlFree(buf);
8123 return;
8124 }
8125 NEXTL(l);
8126
8127 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008128 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008129 */
8130 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8131 if (ctxt->sax->cdataBlock != NULL)
8132 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008133 else if (ctxt->sax->characters != NULL)
8134 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008135 }
8136 xmlFree(buf);
8137}
8138
8139/**
8140 * xmlParseContent:
8141 * @ctxt: an XML parser context
8142 *
8143 * Parse a content:
8144 *
8145 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8146 */
8147
8148void
8149xmlParseContent(xmlParserCtxtPtr ctxt) {
8150 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008151 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008152 ((RAW != '<') || (NXT(1) != '/'))) {
8153 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008154 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008155 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008156
8157 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008158 * First case : a Processing Instruction.
8159 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008160 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008161 xmlParsePI(ctxt);
8162 }
8163
8164 /*
8165 * Second case : a CDSection
8166 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008167 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008168 (NXT(2) == '[') && (NXT(3) == 'C') &&
8169 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8170 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8171 (NXT(8) == '[')) {
8172 xmlParseCDSect(ctxt);
8173 }
8174
8175 /*
8176 * Third case : a comment
8177 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008178 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008179 (NXT(2) == '-') && (NXT(3) == '-')) {
8180 xmlParseComment(ctxt);
8181 ctxt->instate = XML_PARSER_CONTENT;
8182 }
8183
8184 /*
8185 * Fourth case : a sub-element.
8186 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008187 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008188 xmlParseElement(ctxt);
8189 }
8190
8191 /*
8192 * Fifth case : a reference. If if has not been resolved,
8193 * parsing returns it's Name, create the node
8194 */
8195
Daniel Veillard21a0f912001-02-25 19:54:14 +00008196 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008197 xmlParseReference(ctxt);
8198 }
8199
8200 /*
8201 * Last case, text. Note that References are handled directly.
8202 */
8203 else {
8204 xmlParseCharData(ctxt, 0);
8205 }
8206
8207 GROW;
8208 /*
8209 * Pop-up of finished entities.
8210 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008211 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008212 xmlPopInput(ctxt);
8213 SHRINK;
8214
Daniel Veillardfdc91562002-07-01 21:52:03 +00008215 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008216 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8217 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008218 ctxt->instate = XML_PARSER_EOF;
8219 break;
8220 }
8221 }
8222}
8223
8224/**
8225 * xmlParseElement:
8226 * @ctxt: an XML parser context
8227 *
8228 * parse an XML element, this is highly recursive
8229 *
8230 * [39] element ::= EmptyElemTag | STag content ETag
8231 *
8232 * [ WFC: Element Type Match ]
8233 * The Name in an element's end-tag must match the element type in the
8234 * start-tag.
8235 *
Owen Taylor3473f882001-02-23 17:55:21 +00008236 */
8237
8238void
8239xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008240 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008241 const xmlChar *prefix;
8242 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008243 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008244 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008245 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008247
8248 /* Capture start position */
8249 if (ctxt->record_info) {
8250 node_info.begin_pos = ctxt->input->consumed +
8251 (CUR_PTR - ctxt->input->base);
8252 node_info.begin_line = ctxt->input->line;
8253 }
8254
8255 if (ctxt->spaceNr == 0)
8256 spacePush(ctxt, -1);
8257 else
8258 spacePush(ctxt, *ctxt->space);
8259
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008260 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008261#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008262 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008263#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008265#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008266 else
8267 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008268#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008269 if (name == NULL) {
8270 spacePop(ctxt);
8271 return;
8272 }
8273 namePush(ctxt, name);
8274 ret = ctxt->node;
8275
Daniel Veillard4432df22003-09-28 18:58:27 +00008276#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008277 /*
8278 * [ VC: Root Element Type ]
8279 * The Name in the document type declaration must match the element
8280 * type of the root element.
8281 */
8282 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8283 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8284 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008285#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008286
8287 /*
8288 * Check for an Empty Element.
8289 */
8290 if ((RAW == '/') && (NXT(1) == '>')) {
8291 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008292 if (ctxt->sax2) {
8293 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8294 (!ctxt->disableSAX))
8295 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008296#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008297 } else {
8298 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8299 (!ctxt->disableSAX))
8300 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008301#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008302 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008303 namePop(ctxt);
8304 spacePop(ctxt);
8305 if (nsNr != ctxt->nsNr)
8306 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008307 if ( ret != NULL && ctxt->record_info ) {
8308 node_info.end_pos = ctxt->input->consumed +
8309 (CUR_PTR - ctxt->input->base);
8310 node_info.end_line = ctxt->input->line;
8311 node_info.node = ret;
8312 xmlParserAddNodeInfo(ctxt, &node_info);
8313 }
8314 return;
8315 }
8316 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008317 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008318 } else {
8319 ctxt->errNo = XML_ERR_GT_REQUIRED;
8320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8321 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008322 "Couldn't find end of Start Tag %s line %d\n",
8323 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008324 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008325 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008326
8327 /*
8328 * end of parsing of this node.
8329 */
8330 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008331 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008332 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008333 if (nsNr != ctxt->nsNr)
8334 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008335
8336 /*
8337 * Capture end position and add node
8338 */
8339 if ( ret != NULL && ctxt->record_info ) {
8340 node_info.end_pos = ctxt->input->consumed +
8341 (CUR_PTR - ctxt->input->base);
8342 node_info.end_line = ctxt->input->line;
8343 node_info.node = ret;
8344 xmlParserAddNodeInfo(ctxt, &node_info);
8345 }
8346 return;
8347 }
8348
8349 /*
8350 * Parse the content of the element:
8351 */
8352 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00008353 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00008354 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00008355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8356 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008357 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008360
8361 /*
8362 * end of parsing of this node.
8363 */
8364 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008365 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008366 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008367 if (nsNr != ctxt->nsNr)
8368 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008369 return;
8370 }
8371
8372 /*
8373 * parse the end of tag: '</' should be here.
8374 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008375 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008376 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008377 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008378 }
8379#ifdef LIBXML_SAX1_ENABLED
8380 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008381 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008382#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008383
8384 /*
8385 * Capture end position and add node
8386 */
8387 if ( ret != NULL && ctxt->record_info ) {
8388 node_info.end_pos = ctxt->input->consumed +
8389 (CUR_PTR - ctxt->input->base);
8390 node_info.end_line = ctxt->input->line;
8391 node_info.node = ret;
8392 xmlParserAddNodeInfo(ctxt, &node_info);
8393 }
8394}
8395
8396/**
8397 * xmlParseVersionNum:
8398 * @ctxt: an XML parser context
8399 *
8400 * parse the XML version value.
8401 *
8402 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8403 *
8404 * Returns the string giving the XML version number, or NULL
8405 */
8406xmlChar *
8407xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8408 xmlChar *buf = NULL;
8409 int len = 0;
8410 int size = 10;
8411 xmlChar cur;
8412
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008413 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008414 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008415 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008416 return(NULL);
8417 }
8418 cur = CUR;
8419 while (((cur >= 'a') && (cur <= 'z')) ||
8420 ((cur >= 'A') && (cur <= 'Z')) ||
8421 ((cur >= '0') && (cur <= '9')) ||
8422 (cur == '_') || (cur == '.') ||
8423 (cur == ':') || (cur == '-')) {
8424 if (len + 1 >= size) {
8425 size *= 2;
8426 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8427 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008428 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008429 return(NULL);
8430 }
8431 }
8432 buf[len++] = cur;
8433 NEXT;
8434 cur=CUR;
8435 }
8436 buf[len] = 0;
8437 return(buf);
8438}
8439
8440/**
8441 * xmlParseVersionInfo:
8442 * @ctxt: an XML parser context
8443 *
8444 * parse the XML version.
8445 *
8446 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8447 *
8448 * [25] Eq ::= S? '=' S?
8449 *
8450 * Returns the version string, e.g. "1.0"
8451 */
8452
8453xmlChar *
8454xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8455 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008456
8457 if ((RAW == 'v') && (NXT(1) == 'e') &&
8458 (NXT(2) == 'r') && (NXT(3) == 's') &&
8459 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8460 (NXT(6) == 'n')) {
8461 SKIP(7);
8462 SKIP_BLANKS;
8463 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008464 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008465 return(NULL);
8466 }
8467 NEXT;
8468 SKIP_BLANKS;
8469 if (RAW == '"') {
8470 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008471 version = xmlParseVersionNum(ctxt);
8472 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008473 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008474 } else
8475 NEXT;
8476 } else if (RAW == '\''){
8477 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008478 version = xmlParseVersionNum(ctxt);
8479 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008480 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008481 } else
8482 NEXT;
8483 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008484 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008485 }
8486 }
8487 return(version);
8488}
8489
8490/**
8491 * xmlParseEncName:
8492 * @ctxt: an XML parser context
8493 *
8494 * parse the XML encoding name
8495 *
8496 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8497 *
8498 * Returns the encoding name value or NULL
8499 */
8500xmlChar *
8501xmlParseEncName(xmlParserCtxtPtr ctxt) {
8502 xmlChar *buf = NULL;
8503 int len = 0;
8504 int size = 10;
8505 xmlChar cur;
8506
8507 cur = CUR;
8508 if (((cur >= 'a') && (cur <= 'z')) ||
8509 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008510 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008511 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008512 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008513 return(NULL);
8514 }
8515
8516 buf[len++] = cur;
8517 NEXT;
8518 cur = CUR;
8519 while (((cur >= 'a') && (cur <= 'z')) ||
8520 ((cur >= 'A') && (cur <= 'Z')) ||
8521 ((cur >= '0') && (cur <= '9')) ||
8522 (cur == '.') || (cur == '_') ||
8523 (cur == '-')) {
8524 if (len + 1 >= size) {
8525 size *= 2;
8526 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8527 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008528 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008529 return(NULL);
8530 }
8531 }
8532 buf[len++] = cur;
8533 NEXT;
8534 cur = CUR;
8535 if (cur == 0) {
8536 SHRINK;
8537 GROW;
8538 cur = CUR;
8539 }
8540 }
8541 buf[len] = 0;
8542 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008543 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008544 }
8545 return(buf);
8546}
8547
8548/**
8549 * xmlParseEncodingDecl:
8550 * @ctxt: an XML parser context
8551 *
8552 * parse the XML encoding declaration
8553 *
8554 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8555 *
8556 * this setups the conversion filters.
8557 *
8558 * Returns the encoding value or NULL
8559 */
8560
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008561const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008562xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8563 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008564
8565 SKIP_BLANKS;
8566 if ((RAW == 'e') && (NXT(1) == 'n') &&
8567 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8568 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8569 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8570 SKIP(8);
8571 SKIP_BLANKS;
8572 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008573 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008574 return(NULL);
8575 }
8576 NEXT;
8577 SKIP_BLANKS;
8578 if (RAW == '"') {
8579 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008580 encoding = xmlParseEncName(ctxt);
8581 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008582 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008583 } else
8584 NEXT;
8585 } else if (RAW == '\''){
8586 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008587 encoding = xmlParseEncName(ctxt);
8588 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008589 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008590 } else
8591 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008592 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008593 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008595 /*
8596 * UTF-16 encoding stwich has already taken place at this stage,
8597 * more over the little-endian/big-endian selection is already done
8598 */
8599 if ((encoding != NULL) &&
8600 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8601 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008602 if (ctxt->encoding != NULL)
8603 xmlFree((xmlChar *) ctxt->encoding);
8604 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008605 }
8606 /*
8607 * UTF-8 encoding is handled natively
8608 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008609 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008610 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8611 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008612 if (ctxt->encoding != NULL)
8613 xmlFree((xmlChar *) ctxt->encoding);
8614 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008615 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008616 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008617 xmlCharEncodingHandlerPtr handler;
8618
8619 if (ctxt->input->encoding != NULL)
8620 xmlFree((xmlChar *) ctxt->input->encoding);
8621 ctxt->input->encoding = encoding;
8622
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008623 handler = xmlFindCharEncodingHandler((const char *) encoding);
8624 if (handler != NULL) {
8625 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008626 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008627 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8629 ctxt->sax->error(ctxt->userData,
8630 "Unsupported encoding %s\n", encoding);
8631 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008632 }
8633 }
8634 }
8635 return(encoding);
8636}
8637
8638/**
8639 * xmlParseSDDecl:
8640 * @ctxt: an XML parser context
8641 *
8642 * parse the XML standalone declaration
8643 *
8644 * [32] SDDecl ::= S 'standalone' Eq
8645 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8646 *
8647 * [ VC: Standalone Document Declaration ]
8648 * TODO The standalone document declaration must have the value "no"
8649 * if any external markup declarations contain declarations of:
8650 * - attributes with default values, if elements to which these
8651 * attributes apply appear in the document without specifications
8652 * of values for these attributes, or
8653 * - entities (other than amp, lt, gt, apos, quot), if references
8654 * to those entities appear in the document, or
8655 * - attributes with values subject to normalization, where the
8656 * attribute appears in the document with a value which will change
8657 * as a result of normalization, or
8658 * - element types with element content, if white space occurs directly
8659 * within any instance of those types.
8660 *
8661 * Returns 1 if standalone, 0 otherwise
8662 */
8663
8664int
8665xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8666 int standalone = -1;
8667
8668 SKIP_BLANKS;
8669 if ((RAW == 's') && (NXT(1) == 't') &&
8670 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8671 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8672 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8673 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8674 SKIP(10);
8675 SKIP_BLANKS;
8676 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008677 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008678 return(standalone);
8679 }
8680 NEXT;
8681 SKIP_BLANKS;
8682 if (RAW == '\''){
8683 NEXT;
8684 if ((RAW == 'n') && (NXT(1) == 'o')) {
8685 standalone = 0;
8686 SKIP(2);
8687 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8688 (NXT(2) == 's')) {
8689 standalone = 1;
8690 SKIP(3);
8691 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008692 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008693 }
8694 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008695 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 } else
8697 NEXT;
8698 } else if (RAW == '"'){
8699 NEXT;
8700 if ((RAW == 'n') && (NXT(1) == 'o')) {
8701 standalone = 0;
8702 SKIP(2);
8703 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8704 (NXT(2) == 's')) {
8705 standalone = 1;
8706 SKIP(3);
8707 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008708 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008709 }
8710 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008711 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008712 } else
8713 NEXT;
8714 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008715 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008716 }
8717 }
8718 return(standalone);
8719}
8720
8721/**
8722 * xmlParseXMLDecl:
8723 * @ctxt: an XML parser context
8724 *
8725 * parse an XML declaration header
8726 *
8727 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8728 */
8729
8730void
8731xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8732 xmlChar *version;
8733
8734 /*
8735 * We know that '<?xml' is here.
8736 */
8737 SKIP(5);
8738
8739 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8741 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008742 }
8743 SKIP_BLANKS;
8744
8745 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008746 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008747 */
8748 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008749 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008750 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008751 } else {
8752 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8753 /*
8754 * TODO: Blueberry should be detected here
8755 */
8756 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
8757 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
8758 version);
8759 }
8760 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008761 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008762 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008763 }
Owen Taylor3473f882001-02-23 17:55:21 +00008764
8765 /*
8766 * We may have the encoding declaration
8767 */
8768 if (!IS_BLANK(RAW)) {
8769 if ((RAW == '?') && (NXT(1) == '>')) {
8770 SKIP(2);
8771 return;
8772 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008773 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008774 }
8775 xmlParseEncodingDecl(ctxt);
8776 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8777 /*
8778 * The XML REC instructs us to stop parsing right here
8779 */
8780 return;
8781 }
8782
8783 /*
8784 * We may have the standalone status.
8785 */
8786 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8787 if ((RAW == '?') && (NXT(1) == '>')) {
8788 SKIP(2);
8789 return;
8790 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008792 }
8793 SKIP_BLANKS;
8794 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8795
8796 SKIP_BLANKS;
8797 if ((RAW == '?') && (NXT(1) == '>')) {
8798 SKIP(2);
8799 } else if (RAW == '>') {
8800 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008801 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008802 NEXT;
8803 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008804 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008805 MOVETO_ENDTAG(CUR_PTR);
8806 NEXT;
8807 }
8808}
8809
8810/**
8811 * xmlParseMisc:
8812 * @ctxt: an XML parser context
8813 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008814 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008815 *
8816 * [27] Misc ::= Comment | PI | S
8817 */
8818
8819void
8820xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008821 while (((RAW == '<') && (NXT(1) == '?')) ||
8822 ((RAW == '<') && (NXT(1) == '!') &&
8823 (NXT(2) == '-') && (NXT(3) == '-')) ||
8824 IS_BLANK(CUR)) {
8825 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008826 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008827 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008828 NEXT;
8829 } else
8830 xmlParseComment(ctxt);
8831 }
8832}
8833
8834/**
8835 * xmlParseDocument:
8836 * @ctxt: an XML parser context
8837 *
8838 * parse an XML document (and build a tree if using the standard SAX
8839 * interface).
8840 *
8841 * [1] document ::= prolog element Misc*
8842 *
8843 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8844 *
8845 * Returns 0, -1 in case of error. the parser context is augmented
8846 * as a result of the parsing.
8847 */
8848
8849int
8850xmlParseDocument(xmlParserCtxtPtr ctxt) {
8851 xmlChar start[4];
8852 xmlCharEncoding enc;
8853
8854 xmlInitParser();
8855
8856 GROW;
8857
8858 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 * SAX: detecting the level.
8860 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008861 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008862
8863 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008864 * SAX: beginning of the document processing.
8865 */
8866 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8867 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8868
Daniel Veillard50f34372001-08-03 12:06:36 +00008869 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008870 /*
8871 * Get the 4 first bytes and decode the charset
8872 * if enc != XML_CHAR_ENCODING_NONE
8873 * plug some encoding conversion routines.
8874 */
8875 start[0] = RAW;
8876 start[1] = NXT(1);
8877 start[2] = NXT(2);
8878 start[3] = NXT(3);
8879 enc = xmlDetectCharEncoding(start, 4);
8880 if (enc != XML_CHAR_ENCODING_NONE) {
8881 xmlSwitchEncoding(ctxt, enc);
8882 }
Owen Taylor3473f882001-02-23 17:55:21 +00008883 }
8884
8885
8886 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008887 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008888 }
8889
8890 /*
8891 * Check for the XMLDecl in the Prolog.
8892 */
8893 GROW;
8894 if ((RAW == '<') && (NXT(1) == '?') &&
8895 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8896 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8897
8898 /*
8899 * Note that we will switch encoding on the fly.
8900 */
8901 xmlParseXMLDecl(ctxt);
8902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8903 /*
8904 * The XML REC instructs us to stop parsing right here
8905 */
8906 return(-1);
8907 }
8908 ctxt->standalone = ctxt->input->standalone;
8909 SKIP_BLANKS;
8910 } else {
8911 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8912 }
8913 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8914 ctxt->sax->startDocument(ctxt->userData);
8915
8916 /*
8917 * The Misc part of the Prolog
8918 */
8919 GROW;
8920 xmlParseMisc(ctxt);
8921
8922 /*
8923 * Then possibly doc type declaration(s) and more Misc
8924 * (doctypedecl Misc*)?
8925 */
8926 GROW;
8927 if ((RAW == '<') && (NXT(1) == '!') &&
8928 (NXT(2) == 'D') && (NXT(3) == 'O') &&
8929 (NXT(4) == 'C') && (NXT(5) == 'T') &&
8930 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8931 (NXT(8) == 'E')) {
8932
8933 ctxt->inSubset = 1;
8934 xmlParseDocTypeDecl(ctxt);
8935 if (RAW == '[') {
8936 ctxt->instate = XML_PARSER_DTD;
8937 xmlParseInternalSubset(ctxt);
8938 }
8939
8940 /*
8941 * Create and update the external subset.
8942 */
8943 ctxt->inSubset = 2;
8944 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8945 (!ctxt->disableSAX))
8946 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8947 ctxt->extSubSystem, ctxt->extSubURI);
8948 ctxt->inSubset = 0;
8949
8950
8951 ctxt->instate = XML_PARSER_PROLOG;
8952 xmlParseMisc(ctxt);
8953 }
8954
8955 /*
8956 * Time to start parsing the tree itself
8957 */
8958 GROW;
8959 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008960 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8961 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008962 } else {
8963 ctxt->instate = XML_PARSER_CONTENT;
8964 xmlParseElement(ctxt);
8965 ctxt->instate = XML_PARSER_EPILOG;
8966
8967
8968 /*
8969 * The Misc part at the end
8970 */
8971 xmlParseMisc(ctxt);
8972
Daniel Veillard561b7f82002-03-20 21:55:57 +00008973 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008974 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008975 }
8976 ctxt->instate = XML_PARSER_EOF;
8977 }
8978
8979 /*
8980 * SAX: end of the document processing.
8981 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008982 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008983 ctxt->sax->endDocument(ctxt->userData);
8984
Daniel Veillard5997aca2002-03-18 18:36:20 +00008985 /*
8986 * Remove locally kept entity definitions if the tree was not built
8987 */
8988 if ((ctxt->myDoc != NULL) &&
8989 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8990 xmlFreeDoc(ctxt->myDoc);
8991 ctxt->myDoc = NULL;
8992 }
8993
Daniel Veillardc7612992002-02-17 22:47:37 +00008994 if (! ctxt->wellFormed) {
8995 ctxt->valid = 0;
8996 return(-1);
8997 }
Owen Taylor3473f882001-02-23 17:55:21 +00008998 return(0);
8999}
9000
9001/**
9002 * xmlParseExtParsedEnt:
9003 * @ctxt: an XML parser context
9004 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009005 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009006 * An external general parsed entity is well-formed if it matches the
9007 * production labeled extParsedEnt.
9008 *
9009 * [78] extParsedEnt ::= TextDecl? content
9010 *
9011 * Returns 0, -1 in case of error. the parser context is augmented
9012 * as a result of the parsing.
9013 */
9014
9015int
9016xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9017 xmlChar start[4];
9018 xmlCharEncoding enc;
9019
9020 xmlDefaultSAXHandlerInit();
9021
Daniel Veillard309f81d2003-09-23 09:02:53 +00009022 xmlDetectSAX2(ctxt);
9023
Owen Taylor3473f882001-02-23 17:55:21 +00009024 GROW;
9025
9026 /*
9027 * SAX: beginning of the document processing.
9028 */
9029 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9030 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9031
9032 /*
9033 * Get the 4 first bytes and decode the charset
9034 * if enc != XML_CHAR_ENCODING_NONE
9035 * plug some encoding conversion routines.
9036 */
9037 start[0] = RAW;
9038 start[1] = NXT(1);
9039 start[2] = NXT(2);
9040 start[3] = NXT(3);
9041 enc = xmlDetectCharEncoding(start, 4);
9042 if (enc != XML_CHAR_ENCODING_NONE) {
9043 xmlSwitchEncoding(ctxt, enc);
9044 }
9045
9046
9047 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009048 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009049 }
9050
9051 /*
9052 * Check for the XMLDecl in the Prolog.
9053 */
9054 GROW;
9055 if ((RAW == '<') && (NXT(1) == '?') &&
9056 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9057 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9058
9059 /*
9060 * Note that we will switch encoding on the fly.
9061 */
9062 xmlParseXMLDecl(ctxt);
9063 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9064 /*
9065 * The XML REC instructs us to stop parsing right here
9066 */
9067 return(-1);
9068 }
9069 SKIP_BLANKS;
9070 } else {
9071 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9072 }
9073 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9074 ctxt->sax->startDocument(ctxt->userData);
9075
9076 /*
9077 * Doing validity checking on chunk doesn't make sense
9078 */
9079 ctxt->instate = XML_PARSER_CONTENT;
9080 ctxt->validate = 0;
9081 ctxt->loadsubset = 0;
9082 ctxt->depth = 0;
9083
9084 xmlParseContent(ctxt);
9085
9086 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009087 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009088 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009089 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009090 }
9091
9092 /*
9093 * SAX: end of the document processing.
9094 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009095 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009096 ctxt->sax->endDocument(ctxt->userData);
9097
9098 if (! ctxt->wellFormed) return(-1);
9099 return(0);
9100}
9101
9102/************************************************************************
9103 * *
9104 * Progressive parsing interfaces *
9105 * *
9106 ************************************************************************/
9107
9108/**
9109 * xmlParseLookupSequence:
9110 * @ctxt: an XML parser context
9111 * @first: the first char to lookup
9112 * @next: the next char to lookup or zero
9113 * @third: the next char to lookup or zero
9114 *
9115 * Try to find if a sequence (first, next, third) or just (first next) or
9116 * (first) is available in the input stream.
9117 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9118 * to avoid rescanning sequences of bytes, it DOES change the state of the
9119 * parser, do not use liberally.
9120 *
9121 * Returns the index to the current parsing point if the full sequence
9122 * is available, -1 otherwise.
9123 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009124static int
Owen Taylor3473f882001-02-23 17:55:21 +00009125xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9126 xmlChar next, xmlChar third) {
9127 int base, len;
9128 xmlParserInputPtr in;
9129 const xmlChar *buf;
9130
9131 in = ctxt->input;
9132 if (in == NULL) return(-1);
9133 base = in->cur - in->base;
9134 if (base < 0) return(-1);
9135 if (ctxt->checkIndex > base)
9136 base = ctxt->checkIndex;
9137 if (in->buf == NULL) {
9138 buf = in->base;
9139 len = in->length;
9140 } else {
9141 buf = in->buf->buffer->content;
9142 len = in->buf->buffer->use;
9143 }
9144 /* take into account the sequence length */
9145 if (third) len -= 2;
9146 else if (next) len --;
9147 for (;base < len;base++) {
9148 if (buf[base] == first) {
9149 if (third != 0) {
9150 if ((buf[base + 1] != next) ||
9151 (buf[base + 2] != third)) continue;
9152 } else if (next != 0) {
9153 if (buf[base + 1] != next) continue;
9154 }
9155 ctxt->checkIndex = 0;
9156#ifdef DEBUG_PUSH
9157 if (next == 0)
9158 xmlGenericError(xmlGenericErrorContext,
9159 "PP: lookup '%c' found at %d\n",
9160 first, base);
9161 else if (third == 0)
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: lookup '%c%c' found at %d\n",
9164 first, next, base);
9165 else
9166 xmlGenericError(xmlGenericErrorContext,
9167 "PP: lookup '%c%c%c' found at %d\n",
9168 first, next, third, base);
9169#endif
9170 return(base - (in->cur - in->base));
9171 }
9172 }
9173 ctxt->checkIndex = base;
9174#ifdef DEBUG_PUSH
9175 if (next == 0)
9176 xmlGenericError(xmlGenericErrorContext,
9177 "PP: lookup '%c' failed\n", first);
9178 else if (third == 0)
9179 xmlGenericError(xmlGenericErrorContext,
9180 "PP: lookup '%c%c' failed\n", first, next);
9181 else
9182 xmlGenericError(xmlGenericErrorContext,
9183 "PP: lookup '%c%c%c' failed\n", first, next, third);
9184#endif
9185 return(-1);
9186}
9187
9188/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009189 * xmlParseGetLasts:
9190 * @ctxt: an XML parser context
9191 * @lastlt: pointer to store the last '<' from the input
9192 * @lastgt: pointer to store the last '>' from the input
9193 *
9194 * Lookup the last < and > in the current chunk
9195 */
9196static void
9197xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9198 const xmlChar **lastgt) {
9199 const xmlChar *tmp;
9200
9201 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9202 xmlGenericError(xmlGenericErrorContext,
9203 "Internal error: xmlParseGetLasts\n");
9204 return;
9205 }
9206 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9207 tmp = ctxt->input->end;
9208 tmp--;
9209 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9210 (*tmp != '>')) tmp--;
9211 if (tmp < ctxt->input->base) {
9212 *lastlt = NULL;
9213 *lastgt = NULL;
9214 } else if (*tmp == '<') {
9215 *lastlt = tmp;
9216 tmp--;
9217 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9218 if (tmp < ctxt->input->base)
9219 *lastgt = NULL;
9220 else
9221 *lastgt = tmp;
9222 } else {
9223 *lastgt = tmp;
9224 tmp--;
9225 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9226 if (tmp < ctxt->input->base)
9227 *lastlt = NULL;
9228 else
9229 *lastlt = tmp;
9230 }
9231
9232 } else {
9233 *lastlt = NULL;
9234 *lastgt = NULL;
9235 }
9236}
9237/**
Owen Taylor3473f882001-02-23 17:55:21 +00009238 * xmlParseTryOrFinish:
9239 * @ctxt: an XML parser context
9240 * @terminate: last chunk indicator
9241 *
9242 * Try to progress on parsing
9243 *
9244 * Returns zero if no parsing was possible
9245 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009246static int
Owen Taylor3473f882001-02-23 17:55:21 +00009247xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9248 int ret = 0;
9249 int avail;
9250 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009251 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009252
9253#ifdef DEBUG_PUSH
9254 switch (ctxt->instate) {
9255 case XML_PARSER_EOF:
9256 xmlGenericError(xmlGenericErrorContext,
9257 "PP: try EOF\n"); break;
9258 case XML_PARSER_START:
9259 xmlGenericError(xmlGenericErrorContext,
9260 "PP: try START\n"); break;
9261 case XML_PARSER_MISC:
9262 xmlGenericError(xmlGenericErrorContext,
9263 "PP: try MISC\n");break;
9264 case XML_PARSER_COMMENT:
9265 xmlGenericError(xmlGenericErrorContext,
9266 "PP: try COMMENT\n");break;
9267 case XML_PARSER_PROLOG:
9268 xmlGenericError(xmlGenericErrorContext,
9269 "PP: try PROLOG\n");break;
9270 case XML_PARSER_START_TAG:
9271 xmlGenericError(xmlGenericErrorContext,
9272 "PP: try START_TAG\n");break;
9273 case XML_PARSER_CONTENT:
9274 xmlGenericError(xmlGenericErrorContext,
9275 "PP: try CONTENT\n");break;
9276 case XML_PARSER_CDATA_SECTION:
9277 xmlGenericError(xmlGenericErrorContext,
9278 "PP: try CDATA_SECTION\n");break;
9279 case XML_PARSER_END_TAG:
9280 xmlGenericError(xmlGenericErrorContext,
9281 "PP: try END_TAG\n");break;
9282 case XML_PARSER_ENTITY_DECL:
9283 xmlGenericError(xmlGenericErrorContext,
9284 "PP: try ENTITY_DECL\n");break;
9285 case XML_PARSER_ENTITY_VALUE:
9286 xmlGenericError(xmlGenericErrorContext,
9287 "PP: try ENTITY_VALUE\n");break;
9288 case XML_PARSER_ATTRIBUTE_VALUE:
9289 xmlGenericError(xmlGenericErrorContext,
9290 "PP: try ATTRIBUTE_VALUE\n");break;
9291 case XML_PARSER_DTD:
9292 xmlGenericError(xmlGenericErrorContext,
9293 "PP: try DTD\n");break;
9294 case XML_PARSER_EPILOG:
9295 xmlGenericError(xmlGenericErrorContext,
9296 "PP: try EPILOG\n");break;
9297 case XML_PARSER_PI:
9298 xmlGenericError(xmlGenericErrorContext,
9299 "PP: try PI\n");break;
9300 case XML_PARSER_IGNORE:
9301 xmlGenericError(xmlGenericErrorContext,
9302 "PP: try IGNORE\n");break;
9303 }
9304#endif
9305
Daniel Veillarda880b122003-04-21 21:36:41 +00009306 if (ctxt->input->cur - ctxt->input->base > 4096) {
9307 xmlSHRINK(ctxt);
9308 ctxt->checkIndex = 0;
9309 }
9310 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009311
Daniel Veillarda880b122003-04-21 21:36:41 +00009312 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009313 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9314 return(0);
9315
9316
Owen Taylor3473f882001-02-23 17:55:21 +00009317 /*
9318 * Pop-up of finished entities.
9319 */
9320 while ((RAW == 0) && (ctxt->inputNr > 1))
9321 xmlPopInput(ctxt);
9322
9323 if (ctxt->input ==NULL) break;
9324 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009325 avail = ctxt->input->length -
9326 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009327 else {
9328 /*
9329 * If we are operating on converted input, try to flush
9330 * remainng chars to avoid them stalling in the non-converted
9331 * buffer.
9332 */
9333 if ((ctxt->input->buf->raw != NULL) &&
9334 (ctxt->input->buf->raw->use > 0)) {
9335 int base = ctxt->input->base -
9336 ctxt->input->buf->buffer->content;
9337 int current = ctxt->input->cur - ctxt->input->base;
9338
9339 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9340 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9341 ctxt->input->cur = ctxt->input->base + current;
9342 ctxt->input->end =
9343 &ctxt->input->buf->buffer->content[
9344 ctxt->input->buf->buffer->use];
9345 }
9346 avail = ctxt->input->buf->buffer->use -
9347 (ctxt->input->cur - ctxt->input->base);
9348 }
Owen Taylor3473f882001-02-23 17:55:21 +00009349 if (avail < 1)
9350 goto done;
9351 switch (ctxt->instate) {
9352 case XML_PARSER_EOF:
9353 /*
9354 * Document parsing is done !
9355 */
9356 goto done;
9357 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009358 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9359 xmlChar start[4];
9360 xmlCharEncoding enc;
9361
9362 /*
9363 * Very first chars read from the document flow.
9364 */
9365 if (avail < 4)
9366 goto done;
9367
9368 /*
9369 * Get the 4 first bytes and decode the charset
9370 * if enc != XML_CHAR_ENCODING_NONE
9371 * plug some encoding conversion routines.
9372 */
9373 start[0] = RAW;
9374 start[1] = NXT(1);
9375 start[2] = NXT(2);
9376 start[3] = NXT(3);
9377 enc = xmlDetectCharEncoding(start, 4);
9378 if (enc != XML_CHAR_ENCODING_NONE) {
9379 xmlSwitchEncoding(ctxt, enc);
9380 }
9381 break;
9382 }
Owen Taylor3473f882001-02-23 17:55:21 +00009383
9384 cur = ctxt->input->cur[0];
9385 next = ctxt->input->cur[1];
9386 if (cur == 0) {
9387 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9388 ctxt->sax->setDocumentLocator(ctxt->userData,
9389 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009390 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009391 ctxt->instate = XML_PARSER_EOF;
9392#ifdef DEBUG_PUSH
9393 xmlGenericError(xmlGenericErrorContext,
9394 "PP: entering EOF\n");
9395#endif
9396 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9397 ctxt->sax->endDocument(ctxt->userData);
9398 goto done;
9399 }
9400 if ((cur == '<') && (next == '?')) {
9401 /* PI or XML decl */
9402 if (avail < 5) return(ret);
9403 if ((!terminate) &&
9404 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9405 return(ret);
9406 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9407 ctxt->sax->setDocumentLocator(ctxt->userData,
9408 &xmlDefaultSAXLocator);
9409 if ((ctxt->input->cur[2] == 'x') &&
9410 (ctxt->input->cur[3] == 'm') &&
9411 (ctxt->input->cur[4] == 'l') &&
9412 (IS_BLANK(ctxt->input->cur[5]))) {
9413 ret += 5;
9414#ifdef DEBUG_PUSH
9415 xmlGenericError(xmlGenericErrorContext,
9416 "PP: Parsing XML Decl\n");
9417#endif
9418 xmlParseXMLDecl(ctxt);
9419 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9420 /*
9421 * The XML REC instructs us to stop parsing right
9422 * here
9423 */
9424 ctxt->instate = XML_PARSER_EOF;
9425 return(0);
9426 }
9427 ctxt->standalone = ctxt->input->standalone;
9428 if ((ctxt->encoding == NULL) &&
9429 (ctxt->input->encoding != NULL))
9430 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9431 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9432 (!ctxt->disableSAX))
9433 ctxt->sax->startDocument(ctxt->userData);
9434 ctxt->instate = XML_PARSER_MISC;
9435#ifdef DEBUG_PUSH
9436 xmlGenericError(xmlGenericErrorContext,
9437 "PP: entering MISC\n");
9438#endif
9439 } else {
9440 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9441 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9442 (!ctxt->disableSAX))
9443 ctxt->sax->startDocument(ctxt->userData);
9444 ctxt->instate = XML_PARSER_MISC;
9445#ifdef DEBUG_PUSH
9446 xmlGenericError(xmlGenericErrorContext,
9447 "PP: entering MISC\n");
9448#endif
9449 }
9450 } else {
9451 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9452 ctxt->sax->setDocumentLocator(ctxt->userData,
9453 &xmlDefaultSAXLocator);
9454 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9455 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9456 (!ctxt->disableSAX))
9457 ctxt->sax->startDocument(ctxt->userData);
9458 ctxt->instate = XML_PARSER_MISC;
9459#ifdef DEBUG_PUSH
9460 xmlGenericError(xmlGenericErrorContext,
9461 "PP: entering MISC\n");
9462#endif
9463 }
9464 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009465 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009466 const xmlChar *name;
9467 const xmlChar *prefix;
9468 const xmlChar *URI;
9469 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009470
9471 if ((avail < 2) && (ctxt->inputNr == 1))
9472 goto done;
9473 cur = ctxt->input->cur[0];
9474 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009475 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9478 ctxt->sax->endDocument(ctxt->userData);
9479 goto done;
9480 }
9481 if (!terminate) {
9482 if (ctxt->progressive) {
9483 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9484 goto done;
9485 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9486 goto done;
9487 }
9488 }
9489 if (ctxt->spaceNr == 0)
9490 spacePush(ctxt, -1);
9491 else
9492 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009493#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009494 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009495#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009496 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009497#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009498 else
9499 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009500#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009501 if (name == NULL) {
9502 spacePop(ctxt);
9503 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009504 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9505 ctxt->sax->endDocument(ctxt->userData);
9506 goto done;
9507 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009508#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009509 /*
9510 * [ VC: Root Element Type ]
9511 * The Name in the document type declaration must match
9512 * the element type of the root element.
9513 */
9514 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9515 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9516 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009517#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009518
9519 /*
9520 * Check for an Empty Element.
9521 */
9522 if ((RAW == '/') && (NXT(1) == '>')) {
9523 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009524
9525 if (ctxt->sax2) {
9526 if ((ctxt->sax != NULL) &&
9527 (ctxt->sax->endElementNs != NULL) &&
9528 (!ctxt->disableSAX))
9529 ctxt->sax->endElementNs(ctxt->userData, name,
9530 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009531#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009532 } else {
9533 if ((ctxt->sax != NULL) &&
9534 (ctxt->sax->endElement != NULL) &&
9535 (!ctxt->disableSAX))
9536 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009537#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009538 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009539 spacePop(ctxt);
9540 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009541 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 } else {
9543 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009544 }
9545 break;
9546 }
9547 if (RAW == '>') {
9548 NEXT;
9549 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009550 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009551 "Couldn't find end of Start Tag %s\n",
9552 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009553 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009555 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009556 if (ctxt->sax2)
9557 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009558#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009559 else
9560 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009561#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562
Daniel Veillarda880b122003-04-21 21:36:41 +00009563 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009564 break;
9565 }
9566 case XML_PARSER_CONTENT: {
9567 const xmlChar *test;
9568 unsigned int cons;
9569 if ((avail < 2) && (ctxt->inputNr == 1))
9570 goto done;
9571 cur = ctxt->input->cur[0];
9572 next = ctxt->input->cur[1];
9573
9574 test = CUR_PTR;
9575 cons = ctxt->input->consumed;
9576 if ((cur == '<') && (next == '/')) {
9577 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009578 break;
9579 } else if ((cur == '<') && (next == '?')) {
9580 if ((!terminate) &&
9581 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9582 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009583 xmlParsePI(ctxt);
9584 } else if ((cur == '<') && (next != '!')) {
9585 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009586 break;
9587 } else if ((cur == '<') && (next == '!') &&
9588 (ctxt->input->cur[2] == '-') &&
9589 (ctxt->input->cur[3] == '-')) {
9590 if ((!terminate) &&
9591 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9592 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009593 xmlParseComment(ctxt);
9594 ctxt->instate = XML_PARSER_CONTENT;
9595 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9596 (ctxt->input->cur[2] == '[') &&
9597 (ctxt->input->cur[3] == 'C') &&
9598 (ctxt->input->cur[4] == 'D') &&
9599 (ctxt->input->cur[5] == 'A') &&
9600 (ctxt->input->cur[6] == 'T') &&
9601 (ctxt->input->cur[7] == 'A') &&
9602 (ctxt->input->cur[8] == '[')) {
9603 SKIP(9);
9604 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009605 break;
9606 } else if ((cur == '<') && (next == '!') &&
9607 (avail < 9)) {
9608 goto done;
9609 } else if (cur == '&') {
9610 if ((!terminate) &&
9611 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9612 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009613 xmlParseReference(ctxt);
9614 } else {
9615 /* TODO Avoid the extra copy, handle directly !!! */
9616 /*
9617 * Goal of the following test is:
9618 * - minimize calls to the SAX 'character' callback
9619 * when they are mergeable
9620 * - handle an problem for isBlank when we only parse
9621 * a sequence of blank chars and the next one is
9622 * not available to check against '<' presence.
9623 * - tries to homogenize the differences in SAX
9624 * callbacks between the push and pull versions
9625 * of the parser.
9626 */
9627 if ((ctxt->inputNr == 1) &&
9628 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9629 if (!terminate) {
9630 if (ctxt->progressive) {
9631 if ((lastlt == NULL) ||
9632 (ctxt->input->cur > lastlt))
9633 goto done;
9634 } else if (xmlParseLookupSequence(ctxt,
9635 '<', 0, 0) < 0) {
9636 goto done;
9637 }
9638 }
9639 }
9640 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009641 xmlParseCharData(ctxt, 0);
9642 }
9643 /*
9644 * Pop-up of finished entities.
9645 */
9646 while ((RAW == 0) && (ctxt->inputNr > 1))
9647 xmlPopInput(ctxt);
9648 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009649 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9650 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009651 ctxt->instate = XML_PARSER_EOF;
9652 break;
9653 }
9654 break;
9655 }
9656 case XML_PARSER_END_TAG:
9657 if (avail < 2)
9658 goto done;
9659 if (!terminate) {
9660 if (ctxt->progressive) {
9661 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9662 goto done;
9663 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9664 goto done;
9665 }
9666 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009667 if (ctxt->sax2) {
9668 xmlParseEndTag2(ctxt,
9669 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9670 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9671 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9672 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009673 }
9674#ifdef LIBXML_SAX1_ENABLED
9675 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009676 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009677#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009678 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009679 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009680 } else {
9681 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009682 }
9683 break;
9684 case XML_PARSER_CDATA_SECTION: {
9685 /*
9686 * The Push mode need to have the SAX callback for
9687 * cdataBlock merge back contiguous callbacks.
9688 */
9689 int base;
9690
9691 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9692 if (base < 0) {
9693 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9694 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9695 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009696 ctxt->sax->cdataBlock(ctxt->userData,
9697 ctxt->input->cur,
9698 XML_PARSER_BIG_BUFFER_SIZE);
9699 else if (ctxt->sax->characters != NULL)
9700 ctxt->sax->characters(ctxt->userData,
9701 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009702 XML_PARSER_BIG_BUFFER_SIZE);
9703 }
9704 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9705 ctxt->checkIndex = 0;
9706 }
9707 goto done;
9708 } else {
9709 if ((ctxt->sax != NULL) && (base > 0) &&
9710 (!ctxt->disableSAX)) {
9711 if (ctxt->sax->cdataBlock != NULL)
9712 ctxt->sax->cdataBlock(ctxt->userData,
9713 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009714 else if (ctxt->sax->characters != NULL)
9715 ctxt->sax->characters(ctxt->userData,
9716 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009717 }
9718 SKIP(base + 3);
9719 ctxt->checkIndex = 0;
9720 ctxt->instate = XML_PARSER_CONTENT;
9721#ifdef DEBUG_PUSH
9722 xmlGenericError(xmlGenericErrorContext,
9723 "PP: entering CONTENT\n");
9724#endif
9725 }
9726 break;
9727 }
Owen Taylor3473f882001-02-23 17:55:21 +00009728 case XML_PARSER_MISC:
9729 SKIP_BLANKS;
9730 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009731 avail = ctxt->input->length -
9732 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009733 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009734 avail = ctxt->input->buf->buffer->use -
9735 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009736 if (avail < 2)
9737 goto done;
9738 cur = ctxt->input->cur[0];
9739 next = ctxt->input->cur[1];
9740 if ((cur == '<') && (next == '?')) {
9741 if ((!terminate) &&
9742 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9743 goto done;
9744#ifdef DEBUG_PUSH
9745 xmlGenericError(xmlGenericErrorContext,
9746 "PP: Parsing PI\n");
9747#endif
9748 xmlParsePI(ctxt);
9749 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009750 (ctxt->input->cur[2] == '-') &&
9751 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009752 if ((!terminate) &&
9753 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9754 goto done;
9755#ifdef DEBUG_PUSH
9756 xmlGenericError(xmlGenericErrorContext,
9757 "PP: Parsing Comment\n");
9758#endif
9759 xmlParseComment(ctxt);
9760 ctxt->instate = XML_PARSER_MISC;
9761 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009762 (ctxt->input->cur[2] == 'D') &&
9763 (ctxt->input->cur[3] == 'O') &&
9764 (ctxt->input->cur[4] == 'C') &&
9765 (ctxt->input->cur[5] == 'T') &&
9766 (ctxt->input->cur[6] == 'Y') &&
9767 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009768 (ctxt->input->cur[8] == 'E')) {
9769 if ((!terminate) &&
9770 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9771 goto done;
9772#ifdef DEBUG_PUSH
9773 xmlGenericError(xmlGenericErrorContext,
9774 "PP: Parsing internal subset\n");
9775#endif
9776 ctxt->inSubset = 1;
9777 xmlParseDocTypeDecl(ctxt);
9778 if (RAW == '[') {
9779 ctxt->instate = XML_PARSER_DTD;
9780#ifdef DEBUG_PUSH
9781 xmlGenericError(xmlGenericErrorContext,
9782 "PP: entering DTD\n");
9783#endif
9784 } else {
9785 /*
9786 * Create and update the external subset.
9787 */
9788 ctxt->inSubset = 2;
9789 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9790 (ctxt->sax->externalSubset != NULL))
9791 ctxt->sax->externalSubset(ctxt->userData,
9792 ctxt->intSubName, ctxt->extSubSystem,
9793 ctxt->extSubURI);
9794 ctxt->inSubset = 0;
9795 ctxt->instate = XML_PARSER_PROLOG;
9796#ifdef DEBUG_PUSH
9797 xmlGenericError(xmlGenericErrorContext,
9798 "PP: entering PROLOG\n");
9799#endif
9800 }
9801 } else if ((cur == '<') && (next == '!') &&
9802 (avail < 9)) {
9803 goto done;
9804 } else {
9805 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 ctxt->progressive = 1;
9807 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009808#ifdef DEBUG_PUSH
9809 xmlGenericError(xmlGenericErrorContext,
9810 "PP: entering START_TAG\n");
9811#endif
9812 }
9813 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009814 case XML_PARSER_PROLOG:
9815 SKIP_BLANKS;
9816 if (ctxt->input->buf == NULL)
9817 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9818 else
9819 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9820 if (avail < 2)
9821 goto done;
9822 cur = ctxt->input->cur[0];
9823 next = ctxt->input->cur[1];
9824 if ((cur == '<') && (next == '?')) {
9825 if ((!terminate) &&
9826 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9827 goto done;
9828#ifdef DEBUG_PUSH
9829 xmlGenericError(xmlGenericErrorContext,
9830 "PP: Parsing PI\n");
9831#endif
9832 xmlParsePI(ctxt);
9833 } else if ((cur == '<') && (next == '!') &&
9834 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9835 if ((!terminate) &&
9836 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9837 goto done;
9838#ifdef DEBUG_PUSH
9839 xmlGenericError(xmlGenericErrorContext,
9840 "PP: Parsing Comment\n");
9841#endif
9842 xmlParseComment(ctxt);
9843 ctxt->instate = XML_PARSER_PROLOG;
9844 } else if ((cur == '<') && (next == '!') &&
9845 (avail < 4)) {
9846 goto done;
9847 } else {
9848 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009849 ctxt->progressive = 1;
9850 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009851#ifdef DEBUG_PUSH
9852 xmlGenericError(xmlGenericErrorContext,
9853 "PP: entering START_TAG\n");
9854#endif
9855 }
9856 break;
9857 case XML_PARSER_EPILOG:
9858 SKIP_BLANKS;
9859 if (ctxt->input->buf == NULL)
9860 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9861 else
9862 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9863 if (avail < 2)
9864 goto done;
9865 cur = ctxt->input->cur[0];
9866 next = ctxt->input->cur[1];
9867 if ((cur == '<') && (next == '?')) {
9868 if ((!terminate) &&
9869 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9870 goto done;
9871#ifdef DEBUG_PUSH
9872 xmlGenericError(xmlGenericErrorContext,
9873 "PP: Parsing PI\n");
9874#endif
9875 xmlParsePI(ctxt);
9876 ctxt->instate = XML_PARSER_EPILOG;
9877 } else if ((cur == '<') && (next == '!') &&
9878 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9879 if ((!terminate) &&
9880 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9881 goto done;
9882#ifdef DEBUG_PUSH
9883 xmlGenericError(xmlGenericErrorContext,
9884 "PP: Parsing Comment\n");
9885#endif
9886 xmlParseComment(ctxt);
9887 ctxt->instate = XML_PARSER_EPILOG;
9888 } else if ((cur == '<') && (next == '!') &&
9889 (avail < 4)) {
9890 goto done;
9891 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009892 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009893 ctxt->instate = XML_PARSER_EOF;
9894#ifdef DEBUG_PUSH
9895 xmlGenericError(xmlGenericErrorContext,
9896 "PP: entering EOF\n");
9897#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009899 ctxt->sax->endDocument(ctxt->userData);
9900 goto done;
9901 }
9902 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009903 case XML_PARSER_DTD: {
9904 /*
9905 * Sorry but progressive parsing of the internal subset
9906 * is not expected to be supported. We first check that
9907 * the full content of the internal subset is available and
9908 * the parsing is launched only at that point.
9909 * Internal subset ends up with "']' S? '>'" in an unescaped
9910 * section and not in a ']]>' sequence which are conditional
9911 * sections (whoever argued to keep that crap in XML deserve
9912 * a place in hell !).
9913 */
9914 int base, i;
9915 xmlChar *buf;
9916 xmlChar quote = 0;
9917
9918 base = ctxt->input->cur - ctxt->input->base;
9919 if (base < 0) return(0);
9920 if (ctxt->checkIndex > base)
9921 base = ctxt->checkIndex;
9922 buf = ctxt->input->buf->buffer->content;
9923 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9924 base++) {
9925 if (quote != 0) {
9926 if (buf[base] == quote)
9927 quote = 0;
9928 continue;
9929 }
9930 if (buf[base] == '"') {
9931 quote = '"';
9932 continue;
9933 }
9934 if (buf[base] == '\'') {
9935 quote = '\'';
9936 continue;
9937 }
9938 if (buf[base] == ']') {
9939 if ((unsigned int) base +1 >=
9940 ctxt->input->buf->buffer->use)
9941 break;
9942 if (buf[base + 1] == ']') {
9943 /* conditional crap, skip both ']' ! */
9944 base++;
9945 continue;
9946 }
9947 for (i = 0;
9948 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9949 i++) {
9950 if (buf[base + i] == '>')
9951 goto found_end_int_subset;
9952 }
9953 break;
9954 }
9955 }
9956 /*
9957 * We didn't found the end of the Internal subset
9958 */
9959 if (quote == 0)
9960 ctxt->checkIndex = base;
9961#ifdef DEBUG_PUSH
9962 if (next == 0)
9963 xmlGenericError(xmlGenericErrorContext,
9964 "PP: lookup of int subset end filed\n");
9965#endif
9966 goto done;
9967
9968found_end_int_subset:
9969 xmlParseInternalSubset(ctxt);
9970 ctxt->inSubset = 2;
9971 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9972 (ctxt->sax->externalSubset != NULL))
9973 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9974 ctxt->extSubSystem, ctxt->extSubURI);
9975 ctxt->inSubset = 0;
9976 ctxt->instate = XML_PARSER_PROLOG;
9977 ctxt->checkIndex = 0;
9978#ifdef DEBUG_PUSH
9979 xmlGenericError(xmlGenericErrorContext,
9980 "PP: entering PROLOG\n");
9981#endif
9982 break;
9983 }
9984 case XML_PARSER_COMMENT:
9985 xmlGenericError(xmlGenericErrorContext,
9986 "PP: internal error, state == COMMENT\n");
9987 ctxt->instate = XML_PARSER_CONTENT;
9988#ifdef DEBUG_PUSH
9989 xmlGenericError(xmlGenericErrorContext,
9990 "PP: entering CONTENT\n");
9991#endif
9992 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009993 case XML_PARSER_IGNORE:
9994 xmlGenericError(xmlGenericErrorContext,
9995 "PP: internal error, state == IGNORE");
9996 ctxt->instate = XML_PARSER_DTD;
9997#ifdef DEBUG_PUSH
9998 xmlGenericError(xmlGenericErrorContext,
9999 "PP: entering DTD\n");
10000#endif
10001 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010002 case XML_PARSER_PI:
10003 xmlGenericError(xmlGenericErrorContext,
10004 "PP: internal error, state == PI\n");
10005 ctxt->instate = XML_PARSER_CONTENT;
10006#ifdef DEBUG_PUSH
10007 xmlGenericError(xmlGenericErrorContext,
10008 "PP: entering CONTENT\n");
10009#endif
10010 break;
10011 case XML_PARSER_ENTITY_DECL:
10012 xmlGenericError(xmlGenericErrorContext,
10013 "PP: internal error, state == ENTITY_DECL\n");
10014 ctxt->instate = XML_PARSER_DTD;
10015#ifdef DEBUG_PUSH
10016 xmlGenericError(xmlGenericErrorContext,
10017 "PP: entering DTD\n");
10018#endif
10019 break;
10020 case XML_PARSER_ENTITY_VALUE:
10021 xmlGenericError(xmlGenericErrorContext,
10022 "PP: internal error, state == ENTITY_VALUE\n");
10023 ctxt->instate = XML_PARSER_CONTENT;
10024#ifdef DEBUG_PUSH
10025 xmlGenericError(xmlGenericErrorContext,
10026 "PP: entering DTD\n");
10027#endif
10028 break;
10029 case XML_PARSER_ATTRIBUTE_VALUE:
10030 xmlGenericError(xmlGenericErrorContext,
10031 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10032 ctxt->instate = XML_PARSER_START_TAG;
10033#ifdef DEBUG_PUSH
10034 xmlGenericError(xmlGenericErrorContext,
10035 "PP: entering START_TAG\n");
10036#endif
10037 break;
10038 case XML_PARSER_SYSTEM_LITERAL:
10039 xmlGenericError(xmlGenericErrorContext,
10040 "PP: internal error, state == SYSTEM_LITERAL\n");
10041 ctxt->instate = XML_PARSER_START_TAG;
10042#ifdef DEBUG_PUSH
10043 xmlGenericError(xmlGenericErrorContext,
10044 "PP: entering START_TAG\n");
10045#endif
10046 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010047 case XML_PARSER_PUBLIC_LITERAL:
10048 xmlGenericError(xmlGenericErrorContext,
10049 "PP: internal error, state == PUBLIC_LITERAL\n");
10050 ctxt->instate = XML_PARSER_START_TAG;
10051#ifdef DEBUG_PUSH
10052 xmlGenericError(xmlGenericErrorContext,
10053 "PP: entering START_TAG\n");
10054#endif
10055 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010056 }
10057 }
10058done:
10059#ifdef DEBUG_PUSH
10060 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10061#endif
10062 return(ret);
10063}
10064
10065/**
Owen Taylor3473f882001-02-23 17:55:21 +000010066 * xmlParseChunk:
10067 * @ctxt: an XML parser context
10068 * @chunk: an char array
10069 * @size: the size in byte of the chunk
10070 * @terminate: last chunk indicator
10071 *
10072 * Parse a Chunk of memory
10073 *
10074 * Returns zero if no error, the xmlParserErrors otherwise.
10075 */
10076int
10077xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10078 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010079 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10080 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010081 if (ctxt->instate == XML_PARSER_START)
10082 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010083 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10084 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10085 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10086 int cur = ctxt->input->cur - ctxt->input->base;
10087
10088 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10089 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10090 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010091 ctxt->input->end =
10092 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010093#ifdef DEBUG_PUSH
10094 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10095#endif
10096
Owen Taylor3473f882001-02-23 17:55:21 +000010097 } else if (ctxt->instate != XML_PARSER_EOF) {
10098 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10099 xmlParserInputBufferPtr in = ctxt->input->buf;
10100 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10101 (in->raw != NULL)) {
10102 int nbchars;
10103
10104 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10105 if (nbchars < 0) {
10106 xmlGenericError(xmlGenericErrorContext,
10107 "xmlParseChunk: encoder error\n");
10108 return(XML_ERR_INVALID_ENCODING);
10109 }
10110 }
10111 }
10112 }
10113 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010114 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10115 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010116 if (terminate) {
10117 /*
10118 * Check for termination
10119 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010120 int avail = 0;
10121 if (ctxt->input->buf == NULL)
10122 avail = ctxt->input->length -
10123 (ctxt->input->cur - ctxt->input->base);
10124 else
10125 avail = ctxt->input->buf->buffer->use -
10126 (ctxt->input->cur - ctxt->input->base);
10127
Owen Taylor3473f882001-02-23 17:55:21 +000010128 if ((ctxt->instate != XML_PARSER_EOF) &&
10129 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010130 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010131 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010132 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010133 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010134 }
Owen Taylor3473f882001-02-23 17:55:21 +000010135 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010136 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010137 ctxt->sax->endDocument(ctxt->userData);
10138 }
10139 ctxt->instate = XML_PARSER_EOF;
10140 }
10141 return((xmlParserErrors) ctxt->errNo);
10142}
10143
10144/************************************************************************
10145 * *
10146 * I/O front end functions to the parser *
10147 * *
10148 ************************************************************************/
10149
10150/**
10151 * xmlStopParser:
10152 * @ctxt: an XML parser context
10153 *
10154 * Blocks further parser processing
10155 */
10156void
10157xmlStopParser(xmlParserCtxtPtr ctxt) {
10158 ctxt->instate = XML_PARSER_EOF;
10159 if (ctxt->input != NULL)
10160 ctxt->input->cur = BAD_CAST"";
10161}
10162
10163/**
10164 * xmlCreatePushParserCtxt:
10165 * @sax: a SAX handler
10166 * @user_data: The user data returned on SAX callbacks
10167 * @chunk: a pointer to an array of chars
10168 * @size: number of chars in the array
10169 * @filename: an optional file name or URI
10170 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010171 * Create a parser context for using the XML parser in push mode.
10172 * If @buffer and @size are non-NULL, the data is used to detect
10173 * the encoding. The remaining characters will be parsed so they
10174 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010175 * To allow content encoding detection, @size should be >= 4
10176 * The value of @filename is used for fetching external entities
10177 * and error/warning reports.
10178 *
10179 * Returns the new parser context or NULL
10180 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010181
Owen Taylor3473f882001-02-23 17:55:21 +000010182xmlParserCtxtPtr
10183xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10184 const char *chunk, int size, const char *filename) {
10185 xmlParserCtxtPtr ctxt;
10186 xmlParserInputPtr inputStream;
10187 xmlParserInputBufferPtr buf;
10188 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10189
10190 /*
10191 * plug some encoding conversion routines
10192 */
10193 if ((chunk != NULL) && (size >= 4))
10194 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10195
10196 buf = xmlAllocParserInputBuffer(enc);
10197 if (buf == NULL) return(NULL);
10198
10199 ctxt = xmlNewParserCtxt();
10200 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010201 xmlGenericError(xmlGenericErrorContext,
10202 "xml parser: out of memory\n");
10203 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010204 return(NULL);
10205 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010206 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10207 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010208 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010209 xmlFreeParserInputBuffer(buf);
10210 xmlFreeParserCtxt(ctxt);
10211 return(NULL);
10212 }
Owen Taylor3473f882001-02-23 17:55:21 +000010213 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010214#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010215 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010216#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010217 xmlFree(ctxt->sax);
10218 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10219 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010220 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010221 xmlFreeParserInputBuffer(buf);
10222 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010223 return(NULL);
10224 }
10225 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10226 if (user_data != NULL)
10227 ctxt->userData = user_data;
10228 }
10229 if (filename == NULL) {
10230 ctxt->directory = NULL;
10231 } else {
10232 ctxt->directory = xmlParserGetDirectory(filename);
10233 }
10234
10235 inputStream = xmlNewInputStream(ctxt);
10236 if (inputStream == NULL) {
10237 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010238 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010239 return(NULL);
10240 }
10241
10242 if (filename == NULL)
10243 inputStream->filename = NULL;
10244 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010245 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010246 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010247 inputStream->buf = buf;
10248 inputStream->base = inputStream->buf->buffer->content;
10249 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010250 inputStream->end =
10251 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010252
10253 inputPush(ctxt, inputStream);
10254
10255 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10256 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010257 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10258 int cur = ctxt->input->cur - ctxt->input->base;
10259
Owen Taylor3473f882001-02-23 17:55:21 +000010260 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010261
10262 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10263 ctxt->input->cur = ctxt->input->base + cur;
10264 ctxt->input->end =
10265 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010266#ifdef DEBUG_PUSH
10267 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10268#endif
10269 }
10270
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010271 if (enc != XML_CHAR_ENCODING_NONE) {
10272 xmlSwitchEncoding(ctxt, enc);
10273 }
10274
Owen Taylor3473f882001-02-23 17:55:21 +000010275 return(ctxt);
10276}
10277
10278/**
10279 * xmlCreateIOParserCtxt:
10280 * @sax: a SAX handler
10281 * @user_data: The user data returned on SAX callbacks
10282 * @ioread: an I/O read function
10283 * @ioclose: an I/O close function
10284 * @ioctx: an I/O handler
10285 * @enc: the charset encoding if known
10286 *
10287 * Create a parser context for using the XML parser with an existing
10288 * I/O stream
10289 *
10290 * Returns the new parser context or NULL
10291 */
10292xmlParserCtxtPtr
10293xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10294 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10295 void *ioctx, xmlCharEncoding enc) {
10296 xmlParserCtxtPtr ctxt;
10297 xmlParserInputPtr inputStream;
10298 xmlParserInputBufferPtr buf;
10299
10300 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10301 if (buf == NULL) return(NULL);
10302
10303 ctxt = xmlNewParserCtxt();
10304 if (ctxt == NULL) {
10305 xmlFree(buf);
10306 return(NULL);
10307 }
10308 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010309#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010310 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010311#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010312 xmlFree(ctxt->sax);
10313 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10314 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010315 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010316 xmlFree(ctxt);
10317 return(NULL);
10318 }
10319 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10320 if (user_data != NULL)
10321 ctxt->userData = user_data;
10322 }
10323
10324 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10325 if (inputStream == NULL) {
10326 xmlFreeParserCtxt(ctxt);
10327 return(NULL);
10328 }
10329 inputPush(ctxt, inputStream);
10330
10331 return(ctxt);
10332}
10333
Daniel Veillard4432df22003-09-28 18:58:27 +000010334#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010335/************************************************************************
10336 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010337 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010338 * *
10339 ************************************************************************/
10340
10341/**
10342 * xmlIOParseDTD:
10343 * @sax: the SAX handler block or NULL
10344 * @input: an Input Buffer
10345 * @enc: the charset encoding if known
10346 *
10347 * Load and parse a DTD
10348 *
10349 * Returns the resulting xmlDtdPtr or NULL in case of error.
10350 * @input will be freed at parsing end.
10351 */
10352
10353xmlDtdPtr
10354xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10355 xmlCharEncoding enc) {
10356 xmlDtdPtr ret = NULL;
10357 xmlParserCtxtPtr ctxt;
10358 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010359 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010360
10361 if (input == NULL)
10362 return(NULL);
10363
10364 ctxt = xmlNewParserCtxt();
10365 if (ctxt == NULL) {
10366 return(NULL);
10367 }
10368
10369 /*
10370 * Set-up the SAX context
10371 */
10372 if (sax != NULL) {
10373 if (ctxt->sax != NULL)
10374 xmlFree(ctxt->sax);
10375 ctxt->sax = sax;
10376 ctxt->userData = NULL;
10377 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010378 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010379
10380 /*
10381 * generate a parser input from the I/O handler
10382 */
10383
10384 pinput = xmlNewIOInputStream(ctxt, input, enc);
10385 if (pinput == NULL) {
10386 if (sax != NULL) ctxt->sax = NULL;
10387 xmlFreeParserCtxt(ctxt);
10388 return(NULL);
10389 }
10390
10391 /*
10392 * plug some encoding conversion routines here.
10393 */
10394 xmlPushInput(ctxt, pinput);
10395
10396 pinput->filename = NULL;
10397 pinput->line = 1;
10398 pinput->col = 1;
10399 pinput->base = ctxt->input->cur;
10400 pinput->cur = ctxt->input->cur;
10401 pinput->free = NULL;
10402
10403 /*
10404 * let's parse that entity knowing it's an external subset.
10405 */
10406 ctxt->inSubset = 2;
10407 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10408 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10409 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010410
10411 if (enc == XML_CHAR_ENCODING_NONE) {
10412 /*
10413 * Get the 4 first bytes and decode the charset
10414 * if enc != XML_CHAR_ENCODING_NONE
10415 * plug some encoding conversion routines.
10416 */
10417 start[0] = RAW;
10418 start[1] = NXT(1);
10419 start[2] = NXT(2);
10420 start[3] = NXT(3);
10421 enc = xmlDetectCharEncoding(start, 4);
10422 if (enc != XML_CHAR_ENCODING_NONE) {
10423 xmlSwitchEncoding(ctxt, enc);
10424 }
10425 }
10426
Owen Taylor3473f882001-02-23 17:55:21 +000010427 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10428
10429 if (ctxt->myDoc != NULL) {
10430 if (ctxt->wellFormed) {
10431 ret = ctxt->myDoc->extSubset;
10432 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010433 if (ret != NULL) {
10434 xmlNodePtr tmp;
10435
10436 ret->doc = NULL;
10437 tmp = ret->children;
10438 while (tmp != NULL) {
10439 tmp->doc = NULL;
10440 tmp = tmp->next;
10441 }
10442 }
Owen Taylor3473f882001-02-23 17:55:21 +000010443 } else {
10444 ret = NULL;
10445 }
10446 xmlFreeDoc(ctxt->myDoc);
10447 ctxt->myDoc = NULL;
10448 }
10449 if (sax != NULL) ctxt->sax = NULL;
10450 xmlFreeParserCtxt(ctxt);
10451
10452 return(ret);
10453}
10454
10455/**
10456 * xmlSAXParseDTD:
10457 * @sax: the SAX handler block
10458 * @ExternalID: a NAME* containing the External ID of the DTD
10459 * @SystemID: a NAME* containing the URL to the DTD
10460 *
10461 * Load and parse an external subset.
10462 *
10463 * Returns the resulting xmlDtdPtr or NULL in case of error.
10464 */
10465
10466xmlDtdPtr
10467xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10468 const xmlChar *SystemID) {
10469 xmlDtdPtr ret = NULL;
10470 xmlParserCtxtPtr ctxt;
10471 xmlParserInputPtr input = NULL;
10472 xmlCharEncoding enc;
10473
10474 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10475
10476 ctxt = xmlNewParserCtxt();
10477 if (ctxt == NULL) {
10478 return(NULL);
10479 }
10480
10481 /*
10482 * Set-up the SAX context
10483 */
10484 if (sax != NULL) {
10485 if (ctxt->sax != NULL)
10486 xmlFree(ctxt->sax);
10487 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010488 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010489 }
10490
10491 /*
10492 * Ask the Entity resolver to load the damn thing
10493 */
10494
10495 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010496 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010497 if (input == NULL) {
10498 if (sax != NULL) ctxt->sax = NULL;
10499 xmlFreeParserCtxt(ctxt);
10500 return(NULL);
10501 }
10502
10503 /*
10504 * plug some encoding conversion routines here.
10505 */
10506 xmlPushInput(ctxt, input);
10507 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10508 xmlSwitchEncoding(ctxt, enc);
10509
10510 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010511 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 input->line = 1;
10513 input->col = 1;
10514 input->base = ctxt->input->cur;
10515 input->cur = ctxt->input->cur;
10516 input->free = NULL;
10517
10518 /*
10519 * let's parse that entity knowing it's an external subset.
10520 */
10521 ctxt->inSubset = 2;
10522 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10523 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10524 ExternalID, SystemID);
10525 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10526
10527 if (ctxt->myDoc != NULL) {
10528 if (ctxt->wellFormed) {
10529 ret = ctxt->myDoc->extSubset;
10530 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010531 if (ret != NULL) {
10532 xmlNodePtr tmp;
10533
10534 ret->doc = NULL;
10535 tmp = ret->children;
10536 while (tmp != NULL) {
10537 tmp->doc = NULL;
10538 tmp = tmp->next;
10539 }
10540 }
Owen Taylor3473f882001-02-23 17:55:21 +000010541 } else {
10542 ret = NULL;
10543 }
10544 xmlFreeDoc(ctxt->myDoc);
10545 ctxt->myDoc = NULL;
10546 }
10547 if (sax != NULL) ctxt->sax = NULL;
10548 xmlFreeParserCtxt(ctxt);
10549
10550 return(ret);
10551}
10552
Daniel Veillard4432df22003-09-28 18:58:27 +000010553
Owen Taylor3473f882001-02-23 17:55:21 +000010554/**
10555 * xmlParseDTD:
10556 * @ExternalID: a NAME* containing the External ID of the DTD
10557 * @SystemID: a NAME* containing the URL to the DTD
10558 *
10559 * Load and parse an external subset.
10560 *
10561 * Returns the resulting xmlDtdPtr or NULL in case of error.
10562 */
10563
10564xmlDtdPtr
10565xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10566 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10567}
Daniel Veillard4432df22003-09-28 18:58:27 +000010568#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010569
10570/************************************************************************
10571 * *
10572 * Front ends when parsing an Entity *
10573 * *
10574 ************************************************************************/
10575
10576/**
Owen Taylor3473f882001-02-23 17:55:21 +000010577 * xmlParseCtxtExternalEntity:
10578 * @ctx: the existing parsing context
10579 * @URL: the URL for the entity to load
10580 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010581 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010582 *
10583 * Parse an external general entity within an existing parsing context
10584 * An external general parsed entity is well-formed if it matches the
10585 * production labeled extParsedEnt.
10586 *
10587 * [78] extParsedEnt ::= TextDecl? content
10588 *
10589 * Returns 0 if the entity is well formed, -1 in case of args problem and
10590 * the parser error code otherwise
10591 */
10592
10593int
10594xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010595 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010596 xmlParserCtxtPtr ctxt;
10597 xmlDocPtr newDoc;
10598 xmlSAXHandlerPtr oldsax = NULL;
10599 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010600 xmlChar start[4];
10601 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010602
10603 if (ctx->depth > 40) {
10604 return(XML_ERR_ENTITY_LOOP);
10605 }
10606
Daniel Veillardcda96922001-08-21 10:56:31 +000010607 if (lst != NULL)
10608 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010609 if ((URL == NULL) && (ID == NULL))
10610 return(-1);
10611 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10612 return(-1);
10613
10614
10615 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10616 if (ctxt == NULL) return(-1);
10617 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010618 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010619 oldsax = ctxt->sax;
10620 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010621 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010622 newDoc = xmlNewDoc(BAD_CAST "1.0");
10623 if (newDoc == NULL) {
10624 xmlFreeParserCtxt(ctxt);
10625 return(-1);
10626 }
10627 if (ctx->myDoc != NULL) {
10628 newDoc->intSubset = ctx->myDoc->intSubset;
10629 newDoc->extSubset = ctx->myDoc->extSubset;
10630 }
10631 if (ctx->myDoc->URL != NULL) {
10632 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10633 }
10634 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10635 if (newDoc->children == NULL) {
10636 ctxt->sax = oldsax;
10637 xmlFreeParserCtxt(ctxt);
10638 newDoc->intSubset = NULL;
10639 newDoc->extSubset = NULL;
10640 xmlFreeDoc(newDoc);
10641 return(-1);
10642 }
10643 nodePush(ctxt, newDoc->children);
10644 if (ctx->myDoc == NULL) {
10645 ctxt->myDoc = newDoc;
10646 } else {
10647 ctxt->myDoc = ctx->myDoc;
10648 newDoc->children->doc = ctx->myDoc;
10649 }
10650
Daniel Veillard87a764e2001-06-20 17:41:10 +000010651 /*
10652 * Get the 4 first bytes and decode the charset
10653 * if enc != XML_CHAR_ENCODING_NONE
10654 * plug some encoding conversion routines.
10655 */
10656 GROW
10657 start[0] = RAW;
10658 start[1] = NXT(1);
10659 start[2] = NXT(2);
10660 start[3] = NXT(3);
10661 enc = xmlDetectCharEncoding(start, 4);
10662 if (enc != XML_CHAR_ENCODING_NONE) {
10663 xmlSwitchEncoding(ctxt, enc);
10664 }
10665
Owen Taylor3473f882001-02-23 17:55:21 +000010666 /*
10667 * Parse a possible text declaration first
10668 */
Owen Taylor3473f882001-02-23 17:55:21 +000010669 if ((RAW == '<') && (NXT(1) == '?') &&
10670 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10671 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10672 xmlParseTextDecl(ctxt);
10673 }
10674
10675 /*
10676 * Doing validity checking on chunk doesn't make sense
10677 */
10678 ctxt->instate = XML_PARSER_CONTENT;
10679 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010680 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010681 ctxt->loadsubset = ctx->loadsubset;
10682 ctxt->depth = ctx->depth + 1;
10683 ctxt->replaceEntities = ctx->replaceEntities;
10684 if (ctxt->validate) {
10685 ctxt->vctxt.error = ctx->vctxt.error;
10686 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010687 } else {
10688 ctxt->vctxt.error = NULL;
10689 ctxt->vctxt.warning = NULL;
10690 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010691 ctxt->vctxt.nodeTab = NULL;
10692 ctxt->vctxt.nodeNr = 0;
10693 ctxt->vctxt.nodeMax = 0;
10694 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010695
10696 xmlParseContent(ctxt);
10697
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010698 ctx->validate = ctxt->validate;
10699 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010700 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010701 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010702 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010703 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010704 }
10705 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010706 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010707 }
10708
10709 if (!ctxt->wellFormed) {
10710 if (ctxt->errNo == 0)
10711 ret = 1;
10712 else
10713 ret = ctxt->errNo;
10714 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010715 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010716 xmlNodePtr cur;
10717
10718 /*
10719 * Return the newly created nodeset after unlinking it from
10720 * they pseudo parent.
10721 */
10722 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010723 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010724 while (cur != NULL) {
10725 cur->parent = NULL;
10726 cur = cur->next;
10727 }
10728 newDoc->children->children = NULL;
10729 }
10730 ret = 0;
10731 }
10732 ctxt->sax = oldsax;
10733 xmlFreeParserCtxt(ctxt);
10734 newDoc->intSubset = NULL;
10735 newDoc->extSubset = NULL;
10736 xmlFreeDoc(newDoc);
10737
10738 return(ret);
10739}
10740
10741/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010742 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010743 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010744 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010745 * @sax: the SAX handler bloc (possibly NULL)
10746 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10747 * @depth: Used for loop detection, use 0
10748 * @URL: the URL for the entity to load
10749 * @ID: the System ID for the entity to load
10750 * @list: the return value for the set of parsed nodes
10751 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010752 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010753 *
10754 * Returns 0 if the entity is well formed, -1 in case of args problem and
10755 * the parser error code otherwise
10756 */
10757
Daniel Veillard7d515752003-09-26 19:12:37 +000010758static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010759xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10760 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010761 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010762 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010763 xmlParserCtxtPtr ctxt;
10764 xmlDocPtr newDoc;
10765 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010766 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010767 xmlChar start[4];
10768 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010769
10770 if (depth > 40) {
10771 return(XML_ERR_ENTITY_LOOP);
10772 }
10773
10774
10775
10776 if (list != NULL)
10777 *list = NULL;
10778 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010779 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010780 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010781 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010782
10783
10784 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010785 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010786 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010787 if (oldctxt != NULL) {
10788 ctxt->_private = oldctxt->_private;
10789 ctxt->loadsubset = oldctxt->loadsubset;
10790 ctxt->validate = oldctxt->validate;
10791 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010792 ctxt->record_info = oldctxt->record_info;
10793 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10794 ctxt->node_seq.length = oldctxt->node_seq.length;
10795 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010796 } else {
10797 /*
10798 * Doing validity checking on chunk without context
10799 * doesn't make sense
10800 */
10801 ctxt->_private = NULL;
10802 ctxt->validate = 0;
10803 ctxt->external = 2;
10804 ctxt->loadsubset = 0;
10805 }
Owen Taylor3473f882001-02-23 17:55:21 +000010806 if (sax != NULL) {
10807 oldsax = ctxt->sax;
10808 ctxt->sax = sax;
10809 if (user_data != NULL)
10810 ctxt->userData = user_data;
10811 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010812 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010813 newDoc = xmlNewDoc(BAD_CAST "1.0");
10814 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010815 ctxt->node_seq.maximum = 0;
10816 ctxt->node_seq.length = 0;
10817 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010818 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010819 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010820 }
10821 if (doc != NULL) {
10822 newDoc->intSubset = doc->intSubset;
10823 newDoc->extSubset = doc->extSubset;
10824 }
10825 if (doc->URL != NULL) {
10826 newDoc->URL = xmlStrdup(doc->URL);
10827 }
10828 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10829 if (newDoc->children == NULL) {
10830 if (sax != NULL)
10831 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010832 ctxt->node_seq.maximum = 0;
10833 ctxt->node_seq.length = 0;
10834 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010835 xmlFreeParserCtxt(ctxt);
10836 newDoc->intSubset = NULL;
10837 newDoc->extSubset = NULL;
10838 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010839 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010840 }
10841 nodePush(ctxt, newDoc->children);
10842 if (doc == NULL) {
10843 ctxt->myDoc = newDoc;
10844 } else {
10845 ctxt->myDoc = doc;
10846 newDoc->children->doc = doc;
10847 }
10848
Daniel Veillard87a764e2001-06-20 17:41:10 +000010849 /*
10850 * Get the 4 first bytes and decode the charset
10851 * if enc != XML_CHAR_ENCODING_NONE
10852 * plug some encoding conversion routines.
10853 */
10854 GROW;
10855 start[0] = RAW;
10856 start[1] = NXT(1);
10857 start[2] = NXT(2);
10858 start[3] = NXT(3);
10859 enc = xmlDetectCharEncoding(start, 4);
10860 if (enc != XML_CHAR_ENCODING_NONE) {
10861 xmlSwitchEncoding(ctxt, enc);
10862 }
10863
Owen Taylor3473f882001-02-23 17:55:21 +000010864 /*
10865 * Parse a possible text declaration first
10866 */
Owen Taylor3473f882001-02-23 17:55:21 +000010867 if ((RAW == '<') && (NXT(1) == '?') &&
10868 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10869 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10870 xmlParseTextDecl(ctxt);
10871 }
10872
Owen Taylor3473f882001-02-23 17:55:21 +000010873 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010874 ctxt->depth = depth;
10875
10876 xmlParseContent(ctxt);
10877
Daniel Veillard561b7f82002-03-20 21:55:57 +000010878 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010879 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010880 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010881 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010882 }
10883 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010884 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010885 }
10886
10887 if (!ctxt->wellFormed) {
10888 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010889 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010890 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010891 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010892 } else {
10893 if (list != NULL) {
10894 xmlNodePtr cur;
10895
10896 /*
10897 * Return the newly created nodeset after unlinking it from
10898 * they pseudo parent.
10899 */
10900 cur = newDoc->children->children;
10901 *list = cur;
10902 while (cur != NULL) {
10903 cur->parent = NULL;
10904 cur = cur->next;
10905 }
10906 newDoc->children->children = NULL;
10907 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010908 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010909 }
10910 if (sax != NULL)
10911 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010912 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10913 oldctxt->node_seq.length = ctxt->node_seq.length;
10914 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010915 ctxt->node_seq.maximum = 0;
10916 ctxt->node_seq.length = 0;
10917 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010918 xmlFreeParserCtxt(ctxt);
10919 newDoc->intSubset = NULL;
10920 newDoc->extSubset = NULL;
10921 xmlFreeDoc(newDoc);
10922
10923 return(ret);
10924}
10925
Daniel Veillard81273902003-09-30 00:43:48 +000010926#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010927/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010928 * xmlParseExternalEntity:
10929 * @doc: the document the chunk pertains to
10930 * @sax: the SAX handler bloc (possibly NULL)
10931 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10932 * @depth: Used for loop detection, use 0
10933 * @URL: the URL for the entity to load
10934 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010935 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010936 *
10937 * Parse an external general entity
10938 * An external general parsed entity is well-formed if it matches the
10939 * production labeled extParsedEnt.
10940 *
10941 * [78] extParsedEnt ::= TextDecl? content
10942 *
10943 * Returns 0 if the entity is well formed, -1 in case of args problem and
10944 * the parser error code otherwise
10945 */
10946
10947int
10948xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010949 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010950 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010951 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010952}
10953
10954/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010955 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010956 * @doc: the document the chunk pertains to
10957 * @sax: the SAX handler bloc (possibly NULL)
10958 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10959 * @depth: Used for loop detection, use 0
10960 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010961 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010962 *
10963 * Parse a well-balanced chunk of an XML document
10964 * called by the parser
10965 * The allowed sequence for the Well Balanced Chunk is the one defined by
10966 * the content production in the XML grammar:
10967 *
10968 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10969 *
10970 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10971 * the parser error code otherwise
10972 */
10973
10974int
10975xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010976 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010977 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10978 depth, string, lst, 0 );
10979}
Daniel Veillard81273902003-09-30 00:43:48 +000010980#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010981
10982/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010983 * xmlParseBalancedChunkMemoryInternal:
10984 * @oldctxt: the existing parsing context
10985 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10986 * @user_data: the user data field for the parser context
10987 * @lst: the return value for the set of parsed nodes
10988 *
10989 *
10990 * Parse a well-balanced chunk of an XML document
10991 * called by the parser
10992 * The allowed sequence for the Well Balanced Chunk is the one defined by
10993 * the content production in the XML grammar:
10994 *
10995 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10996 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010997 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10998 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010999 *
11000 * In case recover is set to 1, the nodelist will not be empty even if
11001 * the parsed chunk is not well balanced.
11002 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011003static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011004xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11005 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11006 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011007 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011008 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011009 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011010 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011011 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011012
11013 if (oldctxt->depth > 40) {
11014 return(XML_ERR_ENTITY_LOOP);
11015 }
11016
11017
11018 if (lst != NULL)
11019 *lst = NULL;
11020 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011021 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011022
11023 size = xmlStrlen(string);
11024
11025 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011026 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011027 if (user_data != NULL)
11028 ctxt->userData = user_data;
11029 else
11030 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011031 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11032 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011033
11034 oldsax = ctxt->sax;
11035 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011036 xmlDetectSAX2(ctxt);
11037
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011038 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011039 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011040 newDoc = xmlNewDoc(BAD_CAST "1.0");
11041 if (newDoc == NULL) {
11042 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011043 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011044 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011045 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011046 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011047 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011048 } else {
11049 ctxt->myDoc = oldctxt->myDoc;
11050 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011051 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000011052 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000011053 BAD_CAST "pseudoroot", NULL);
11054 if (ctxt->myDoc->children == NULL) {
11055 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011056 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011057 xmlFreeParserCtxt(ctxt);
11058 if (newDoc != NULL)
11059 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011060 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011061 }
11062 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011063 ctxt->instate = XML_PARSER_CONTENT;
11064 ctxt->depth = oldctxt->depth + 1;
11065
Daniel Veillard328f48c2002-11-15 15:24:34 +000011066 ctxt->validate = 0;
11067 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011068 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11069 /*
11070 * ID/IDREF registration will be done in xmlValidateElement below
11071 */
11072 ctxt->loadsubset |= XML_SKIP_IDS;
11073 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011074 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011075
Daniel Veillard68e9e742002-11-16 15:35:11 +000011076 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011077 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011078 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011079 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011080 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011081 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011082 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011083 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011084 }
11085
11086 if (!ctxt->wellFormed) {
11087 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011088 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011089 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011090 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011091 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011092 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011093 }
11094
William M. Brack7b9154b2003-09-27 19:23:50 +000011095 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011096 xmlNodePtr cur;
11097
11098 /*
11099 * Return the newly created nodeset after unlinking it from
11100 * they pseudo parent.
11101 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011102 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011103 *lst = cur;
11104 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011105#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011106 if (oldctxt->validate && oldctxt->wellFormed &&
11107 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11108 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11109 oldctxt->myDoc, cur);
11110 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011111#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011112 cur->parent = NULL;
11113 cur = cur->next;
11114 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011115 ctxt->myDoc->children->children = NULL;
11116 }
11117 if (ctxt->myDoc != NULL) {
11118 xmlFreeNode(ctxt->myDoc->children);
11119 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011120 }
11121
11122 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011123 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011124 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011125 if (newDoc != NULL)
11126 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011127
11128 return(ret);
11129}
11130
Daniel Veillard81273902003-09-30 00:43:48 +000011131#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011132/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011133 * xmlParseBalancedChunkMemoryRecover:
11134 * @doc: the document the chunk pertains to
11135 * @sax: the SAX handler bloc (possibly NULL)
11136 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11137 * @depth: Used for loop detection, use 0
11138 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11139 * @lst: the return value for the set of parsed nodes
11140 * @recover: return nodes even if the data is broken (use 0)
11141 *
11142 *
11143 * Parse a well-balanced chunk of an XML document
11144 * called by the parser
11145 * The allowed sequence for the Well Balanced Chunk is the one defined by
11146 * the content production in the XML grammar:
11147 *
11148 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11149 *
11150 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11151 * the parser error code otherwise
11152 *
11153 * In case recover is set to 1, the nodelist will not be empty even if
11154 * the parsed chunk is not well balanced.
11155 */
11156int
11157xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11158 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11159 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011160 xmlParserCtxtPtr ctxt;
11161 xmlDocPtr newDoc;
11162 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011163 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011164 int size;
11165 int ret = 0;
11166
11167 if (depth > 40) {
11168 return(XML_ERR_ENTITY_LOOP);
11169 }
11170
11171
Daniel Veillardcda96922001-08-21 10:56:31 +000011172 if (lst != NULL)
11173 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011174 if (string == NULL)
11175 return(-1);
11176
11177 size = xmlStrlen(string);
11178
11179 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11180 if (ctxt == NULL) return(-1);
11181 ctxt->userData = ctxt;
11182 if (sax != NULL) {
11183 oldsax = ctxt->sax;
11184 ctxt->sax = sax;
11185 if (user_data != NULL)
11186 ctxt->userData = user_data;
11187 }
11188 newDoc = xmlNewDoc(BAD_CAST "1.0");
11189 if (newDoc == NULL) {
11190 xmlFreeParserCtxt(ctxt);
11191 return(-1);
11192 }
11193 if (doc != NULL) {
11194 newDoc->intSubset = doc->intSubset;
11195 newDoc->extSubset = doc->extSubset;
11196 }
11197 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11198 if (newDoc->children == NULL) {
11199 if (sax != NULL)
11200 ctxt->sax = oldsax;
11201 xmlFreeParserCtxt(ctxt);
11202 newDoc->intSubset = NULL;
11203 newDoc->extSubset = NULL;
11204 xmlFreeDoc(newDoc);
11205 return(-1);
11206 }
11207 nodePush(ctxt, newDoc->children);
11208 if (doc == NULL) {
11209 ctxt->myDoc = newDoc;
11210 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011211 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011212 newDoc->children->doc = doc;
11213 }
11214 ctxt->instate = XML_PARSER_CONTENT;
11215 ctxt->depth = depth;
11216
11217 /*
11218 * Doing validity checking on chunk doesn't make sense
11219 */
11220 ctxt->validate = 0;
11221 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011222 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011223
Daniel Veillardb39bc392002-10-26 19:29:51 +000011224 if ( doc != NULL ){
11225 content = doc->children;
11226 doc->children = NULL;
11227 xmlParseContent(ctxt);
11228 doc->children = content;
11229 }
11230 else {
11231 xmlParseContent(ctxt);
11232 }
Owen Taylor3473f882001-02-23 17:55:21 +000011233 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011234 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011235 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011236 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011237 }
11238 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011239 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011240 }
11241
11242 if (!ctxt->wellFormed) {
11243 if (ctxt->errNo == 0)
11244 ret = 1;
11245 else
11246 ret = ctxt->errNo;
11247 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011248 ret = 0;
11249 }
11250
11251 if (lst != NULL && (ret == 0 || recover == 1)) {
11252 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011253
11254 /*
11255 * Return the newly created nodeset after unlinking it from
11256 * they pseudo parent.
11257 */
11258 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011259 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011260 while (cur != NULL) {
11261 cur->parent = NULL;
11262 cur = cur->next;
11263 }
11264 newDoc->children->children = NULL;
11265 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011266
Owen Taylor3473f882001-02-23 17:55:21 +000011267 if (sax != NULL)
11268 ctxt->sax = oldsax;
11269 xmlFreeParserCtxt(ctxt);
11270 newDoc->intSubset = NULL;
11271 newDoc->extSubset = NULL;
11272 xmlFreeDoc(newDoc);
11273
11274 return(ret);
11275}
11276
11277/**
11278 * xmlSAXParseEntity:
11279 * @sax: the SAX handler block
11280 * @filename: the filename
11281 *
11282 * parse an XML external entity out of context and build a tree.
11283 * It use the given SAX function block to handle the parsing callback.
11284 * If sax is NULL, fallback to the default DOM tree building routines.
11285 *
11286 * [78] extParsedEnt ::= TextDecl? content
11287 *
11288 * This correspond to a "Well Balanced" chunk
11289 *
11290 * Returns the resulting document tree
11291 */
11292
11293xmlDocPtr
11294xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11295 xmlDocPtr ret;
11296 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011297
11298 ctxt = xmlCreateFileParserCtxt(filename);
11299 if (ctxt == NULL) {
11300 return(NULL);
11301 }
11302 if (sax != NULL) {
11303 if (ctxt->sax != NULL)
11304 xmlFree(ctxt->sax);
11305 ctxt->sax = sax;
11306 ctxt->userData = NULL;
11307 }
11308
Owen Taylor3473f882001-02-23 17:55:21 +000011309 xmlParseExtParsedEnt(ctxt);
11310
11311 if (ctxt->wellFormed)
11312 ret = ctxt->myDoc;
11313 else {
11314 ret = NULL;
11315 xmlFreeDoc(ctxt->myDoc);
11316 ctxt->myDoc = NULL;
11317 }
11318 if (sax != NULL)
11319 ctxt->sax = NULL;
11320 xmlFreeParserCtxt(ctxt);
11321
11322 return(ret);
11323}
11324
11325/**
11326 * xmlParseEntity:
11327 * @filename: the filename
11328 *
11329 * parse an XML external entity out of context and build a tree.
11330 *
11331 * [78] extParsedEnt ::= TextDecl? content
11332 *
11333 * This correspond to a "Well Balanced" chunk
11334 *
11335 * Returns the resulting document tree
11336 */
11337
11338xmlDocPtr
11339xmlParseEntity(const char *filename) {
11340 return(xmlSAXParseEntity(NULL, filename));
11341}
Daniel Veillard81273902003-09-30 00:43:48 +000011342#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011343
11344/**
11345 * xmlCreateEntityParserCtxt:
11346 * @URL: the entity URL
11347 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011348 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011349 *
11350 * Create a parser context for an external entity
11351 * Automatic support for ZLIB/Compress compressed document is provided
11352 * by default if found at compile-time.
11353 *
11354 * Returns the new parser context or NULL
11355 */
11356xmlParserCtxtPtr
11357xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11358 const xmlChar *base) {
11359 xmlParserCtxtPtr ctxt;
11360 xmlParserInputPtr inputStream;
11361 char *directory = NULL;
11362 xmlChar *uri;
11363
11364 ctxt = xmlNewParserCtxt();
11365 if (ctxt == NULL) {
11366 return(NULL);
11367 }
11368
11369 uri = xmlBuildURI(URL, base);
11370
11371 if (uri == NULL) {
11372 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11373 if (inputStream == NULL) {
11374 xmlFreeParserCtxt(ctxt);
11375 return(NULL);
11376 }
11377
11378 inputPush(ctxt, inputStream);
11379
11380 if ((ctxt->directory == NULL) && (directory == NULL))
11381 directory = xmlParserGetDirectory((char *)URL);
11382 if ((ctxt->directory == NULL) && (directory != NULL))
11383 ctxt->directory = directory;
11384 } else {
11385 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11386 if (inputStream == NULL) {
11387 xmlFree(uri);
11388 xmlFreeParserCtxt(ctxt);
11389 return(NULL);
11390 }
11391
11392 inputPush(ctxt, inputStream);
11393
11394 if ((ctxt->directory == NULL) && (directory == NULL))
11395 directory = xmlParserGetDirectory((char *)uri);
11396 if ((ctxt->directory == NULL) && (directory != NULL))
11397 ctxt->directory = directory;
11398 xmlFree(uri);
11399 }
Owen Taylor3473f882001-02-23 17:55:21 +000011400 return(ctxt);
11401}
11402
11403/************************************************************************
11404 * *
11405 * Front ends when parsing from a file *
11406 * *
11407 ************************************************************************/
11408
11409/**
11410 * xmlCreateFileParserCtxt:
11411 * @filename: the filename
11412 *
11413 * Create a parser context for a file content.
11414 * Automatic support for ZLIB/Compress compressed document is provided
11415 * by default if found at compile-time.
11416 *
11417 * Returns the new parser context or NULL
11418 */
11419xmlParserCtxtPtr
11420xmlCreateFileParserCtxt(const char *filename)
11421{
11422 xmlParserCtxtPtr ctxt;
11423 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011424 char *directory = NULL;
11425
Owen Taylor3473f882001-02-23 17:55:21 +000011426 ctxt = xmlNewParserCtxt();
11427 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011428 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011429 return(NULL);
11430 }
11431
Igor Zlatkovicce076162003-02-23 13:39:39 +000011432
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011433 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011434 if (inputStream == NULL) {
11435 xmlFreeParserCtxt(ctxt);
11436 return(NULL);
11437 }
11438
Owen Taylor3473f882001-02-23 17:55:21 +000011439 inputPush(ctxt, inputStream);
11440 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011441 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011442 if ((ctxt->directory == NULL) && (directory != NULL))
11443 ctxt->directory = directory;
11444
11445 return(ctxt);
11446}
11447
Daniel Veillard81273902003-09-30 00:43:48 +000011448#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011449/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011450 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011451 * @sax: the SAX handler block
11452 * @filename: the filename
11453 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11454 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011455 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011456 *
11457 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11458 * compressed document is provided by default if found at compile-time.
11459 * It use the given SAX function block to handle the parsing callback.
11460 * If sax is NULL, fallback to the default DOM tree building routines.
11461 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011462 * User data (void *) is stored within the parser context in the
11463 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011464 *
Owen Taylor3473f882001-02-23 17:55:21 +000011465 * Returns the resulting document tree
11466 */
11467
11468xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011469xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11470 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011471 xmlDocPtr ret;
11472 xmlParserCtxtPtr ctxt;
11473 char *directory = NULL;
11474
Daniel Veillard635ef722001-10-29 11:48:19 +000011475 xmlInitParser();
11476
Owen Taylor3473f882001-02-23 17:55:21 +000011477 ctxt = xmlCreateFileParserCtxt(filename);
11478 if (ctxt == NULL) {
11479 return(NULL);
11480 }
11481 if (sax != NULL) {
11482 if (ctxt->sax != NULL)
11483 xmlFree(ctxt->sax);
11484 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011485 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011486 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011487 if (data!=NULL) {
11488 ctxt->_private=data;
11489 }
Owen Taylor3473f882001-02-23 17:55:21 +000011490
11491 if ((ctxt->directory == NULL) && (directory == NULL))
11492 directory = xmlParserGetDirectory(filename);
11493 if ((ctxt->directory == NULL) && (directory != NULL))
11494 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11495
Daniel Veillarddad3f682002-11-17 16:47:27 +000011496 ctxt->recovery = recovery;
11497
Owen Taylor3473f882001-02-23 17:55:21 +000011498 xmlParseDocument(ctxt);
11499
William M. Brackc07329e2003-09-08 01:57:30 +000011500 if ((ctxt->wellFormed) || recovery) {
11501 ret = ctxt->myDoc;
11502 if (ctxt->input->buf->compressed > 0)
11503 ret->compression = 9;
11504 else
11505 ret->compression = ctxt->input->buf->compressed;
11506 }
Owen Taylor3473f882001-02-23 17:55:21 +000011507 else {
11508 ret = NULL;
11509 xmlFreeDoc(ctxt->myDoc);
11510 ctxt->myDoc = NULL;
11511 }
11512 if (sax != NULL)
11513 ctxt->sax = NULL;
11514 xmlFreeParserCtxt(ctxt);
11515
11516 return(ret);
11517}
11518
11519/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011520 * xmlSAXParseFile:
11521 * @sax: the SAX handler block
11522 * @filename: the filename
11523 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11524 * documents
11525 *
11526 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11527 * compressed document is provided by default if found at compile-time.
11528 * It use the given SAX function block to handle the parsing callback.
11529 * If sax is NULL, fallback to the default DOM tree building routines.
11530 *
11531 * Returns the resulting document tree
11532 */
11533
11534xmlDocPtr
11535xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11536 int recovery) {
11537 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11538}
11539
11540/**
Owen Taylor3473f882001-02-23 17:55:21 +000011541 * xmlRecoverDoc:
11542 * @cur: a pointer to an array of xmlChar
11543 *
11544 * parse an XML in-memory document and build a tree.
11545 * In the case the document is not Well Formed, a tree is built anyway
11546 *
11547 * Returns the resulting document tree
11548 */
11549
11550xmlDocPtr
11551xmlRecoverDoc(xmlChar *cur) {
11552 return(xmlSAXParseDoc(NULL, cur, 1));
11553}
11554
11555/**
11556 * xmlParseFile:
11557 * @filename: the filename
11558 *
11559 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11560 * compressed document is provided by default if found at compile-time.
11561 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011562 * Returns the resulting document tree if the file was wellformed,
11563 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011564 */
11565
11566xmlDocPtr
11567xmlParseFile(const char *filename) {
11568 return(xmlSAXParseFile(NULL, filename, 0));
11569}
11570
11571/**
11572 * xmlRecoverFile:
11573 * @filename: the filename
11574 *
11575 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11576 * compressed document is provided by default if found at compile-time.
11577 * In the case the document is not Well Formed, a tree is built anyway
11578 *
11579 * Returns the resulting document tree
11580 */
11581
11582xmlDocPtr
11583xmlRecoverFile(const char *filename) {
11584 return(xmlSAXParseFile(NULL, filename, 1));
11585}
11586
11587
11588/**
11589 * xmlSetupParserForBuffer:
11590 * @ctxt: an XML parser context
11591 * @buffer: a xmlChar * buffer
11592 * @filename: a file name
11593 *
11594 * Setup the parser context to parse a new buffer; Clears any prior
11595 * contents from the parser context. The buffer parameter must not be
11596 * NULL, but the filename parameter can be
11597 */
11598void
11599xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11600 const char* filename)
11601{
11602 xmlParserInputPtr input;
11603
11604 input = xmlNewInputStream(ctxt);
11605 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000011606 xmlGenericError(xmlGenericErrorContext,
11607 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000011608 xmlFree(ctxt);
11609 return;
11610 }
11611
11612 xmlClearParserCtxt(ctxt);
11613 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011614 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011615 input->base = buffer;
11616 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011617 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011618 inputPush(ctxt, input);
11619}
11620
11621/**
11622 * xmlSAXUserParseFile:
11623 * @sax: a SAX handler
11624 * @user_data: The user data returned on SAX callbacks
11625 * @filename: a file name
11626 *
11627 * parse an XML file and call the given SAX handler routines.
11628 * Automatic support for ZLIB/Compress compressed document is provided
11629 *
11630 * Returns 0 in case of success or a error number otherwise
11631 */
11632int
11633xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11634 const char *filename) {
11635 int ret = 0;
11636 xmlParserCtxtPtr ctxt;
11637
11638 ctxt = xmlCreateFileParserCtxt(filename);
11639 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011640#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011641 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011642#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011643 xmlFree(ctxt->sax);
11644 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011645 xmlDetectSAX2(ctxt);
11646
Owen Taylor3473f882001-02-23 17:55:21 +000011647 if (user_data != NULL)
11648 ctxt->userData = user_data;
11649
11650 xmlParseDocument(ctxt);
11651
11652 if (ctxt->wellFormed)
11653 ret = 0;
11654 else {
11655 if (ctxt->errNo != 0)
11656 ret = ctxt->errNo;
11657 else
11658 ret = -1;
11659 }
11660 if (sax != NULL)
11661 ctxt->sax = NULL;
11662 xmlFreeParserCtxt(ctxt);
11663
11664 return ret;
11665}
Daniel Veillard81273902003-09-30 00:43:48 +000011666#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011667
11668/************************************************************************
11669 * *
11670 * Front ends when parsing from memory *
11671 * *
11672 ************************************************************************/
11673
11674/**
11675 * xmlCreateMemoryParserCtxt:
11676 * @buffer: a pointer to a char array
11677 * @size: the size of the array
11678 *
11679 * Create a parser context for an XML in-memory document.
11680 *
11681 * Returns the new parser context or NULL
11682 */
11683xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011684xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011685 xmlParserCtxtPtr ctxt;
11686 xmlParserInputPtr input;
11687 xmlParserInputBufferPtr buf;
11688
11689 if (buffer == NULL)
11690 return(NULL);
11691 if (size <= 0)
11692 return(NULL);
11693
11694 ctxt = xmlNewParserCtxt();
11695 if (ctxt == NULL)
11696 return(NULL);
11697
Daniel Veillard53350552003-09-18 13:35:51 +000011698 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011699 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011700 if (buf == NULL) {
11701 xmlFreeParserCtxt(ctxt);
11702 return(NULL);
11703 }
Owen Taylor3473f882001-02-23 17:55:21 +000011704
11705 input = xmlNewInputStream(ctxt);
11706 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011707 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011708 xmlFreeParserCtxt(ctxt);
11709 return(NULL);
11710 }
11711
11712 input->filename = NULL;
11713 input->buf = buf;
11714 input->base = input->buf->buffer->content;
11715 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011716 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011717
11718 inputPush(ctxt, input);
11719 return(ctxt);
11720}
11721
Daniel Veillard81273902003-09-30 00:43:48 +000011722#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011723/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011724 * xmlSAXParseMemoryWithData:
11725 * @sax: the SAX handler block
11726 * @buffer: an pointer to a char array
11727 * @size: the size of the array
11728 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11729 * documents
11730 * @data: the userdata
11731 *
11732 * parse an XML in-memory block and use the given SAX function block
11733 * to handle the parsing callback. If sax is NULL, fallback to the default
11734 * DOM tree building routines.
11735 *
11736 * User data (void *) is stored within the parser context in the
11737 * context's _private member, so it is available nearly everywhere in libxml
11738 *
11739 * Returns the resulting document tree
11740 */
11741
11742xmlDocPtr
11743xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11744 int size, int recovery, void *data) {
11745 xmlDocPtr ret;
11746 xmlParserCtxtPtr ctxt;
11747
11748 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11749 if (ctxt == NULL) return(NULL);
11750 if (sax != NULL) {
11751 if (ctxt->sax != NULL)
11752 xmlFree(ctxt->sax);
11753 ctxt->sax = sax;
11754 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011755 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011756 if (data!=NULL) {
11757 ctxt->_private=data;
11758 }
11759
Daniel Veillardadba5f12003-04-04 16:09:01 +000011760 ctxt->recovery = recovery;
11761
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011762 xmlParseDocument(ctxt);
11763
11764 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11765 else {
11766 ret = NULL;
11767 xmlFreeDoc(ctxt->myDoc);
11768 ctxt->myDoc = NULL;
11769 }
11770 if (sax != NULL)
11771 ctxt->sax = NULL;
11772 xmlFreeParserCtxt(ctxt);
11773
11774 return(ret);
11775}
11776
11777/**
Owen Taylor3473f882001-02-23 17:55:21 +000011778 * xmlSAXParseMemory:
11779 * @sax: the SAX handler block
11780 * @buffer: an pointer to a char array
11781 * @size: the size of the array
11782 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11783 * documents
11784 *
11785 * parse an XML in-memory block and use the given SAX function block
11786 * to handle the parsing callback. If sax is NULL, fallback to the default
11787 * DOM tree building routines.
11788 *
11789 * Returns the resulting document tree
11790 */
11791xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011792xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11793 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011794 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011795}
11796
11797/**
11798 * xmlParseMemory:
11799 * @buffer: an pointer to a char array
11800 * @size: the size of the array
11801 *
11802 * parse an XML in-memory block and build a tree.
11803 *
11804 * Returns the resulting document tree
11805 */
11806
Daniel Veillard50822cb2001-07-26 20:05:51 +000011807xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011808 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11809}
11810
11811/**
11812 * xmlRecoverMemory:
11813 * @buffer: an pointer to a char array
11814 * @size: the size of the array
11815 *
11816 * parse an XML in-memory block and build a tree.
11817 * In the case the document is not Well Formed, a tree is built anyway
11818 *
11819 * Returns the resulting document tree
11820 */
11821
Daniel Veillard50822cb2001-07-26 20:05:51 +000011822xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011823 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11824}
11825
11826/**
11827 * xmlSAXUserParseMemory:
11828 * @sax: a SAX handler
11829 * @user_data: The user data returned on SAX callbacks
11830 * @buffer: an in-memory XML document input
11831 * @size: the length of the XML document in bytes
11832 *
11833 * A better SAX parsing routine.
11834 * parse an XML in-memory buffer and call the given SAX handler routines.
11835 *
11836 * Returns 0 in case of success or a error number otherwise
11837 */
11838int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011839 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011840 int ret = 0;
11841 xmlParserCtxtPtr ctxt;
11842 xmlSAXHandlerPtr oldsax = NULL;
11843
Daniel Veillard9e923512002-08-14 08:48:52 +000011844 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011845 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11846 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011847 oldsax = ctxt->sax;
11848 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011849 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011850 if (user_data != NULL)
11851 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011852
11853 xmlParseDocument(ctxt);
11854
11855 if (ctxt->wellFormed)
11856 ret = 0;
11857 else {
11858 if (ctxt->errNo != 0)
11859 ret = ctxt->errNo;
11860 else
11861 ret = -1;
11862 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011863 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011864 xmlFreeParserCtxt(ctxt);
11865
11866 return ret;
11867}
Daniel Veillard81273902003-09-30 00:43:48 +000011868#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011869
11870/**
11871 * xmlCreateDocParserCtxt:
11872 * @cur: a pointer to an array of xmlChar
11873 *
11874 * Creates a parser context for an XML in-memory document.
11875 *
11876 * Returns the new parser context or NULL
11877 */
11878xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011879xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011880 int len;
11881
11882 if (cur == NULL)
11883 return(NULL);
11884 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011885 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011886}
11887
Daniel Veillard81273902003-09-30 00:43:48 +000011888#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011889/**
11890 * xmlSAXParseDoc:
11891 * @sax: the SAX handler block
11892 * @cur: a pointer to an array of xmlChar
11893 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11894 * documents
11895 *
11896 * parse an XML in-memory document and build a tree.
11897 * It use the given SAX function block to handle the parsing callback.
11898 * If sax is NULL, fallback to the default DOM tree building routines.
11899 *
11900 * Returns the resulting document tree
11901 */
11902
11903xmlDocPtr
11904xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11905 xmlDocPtr ret;
11906 xmlParserCtxtPtr ctxt;
11907
11908 if (cur == NULL) return(NULL);
11909
11910
11911 ctxt = xmlCreateDocParserCtxt(cur);
11912 if (ctxt == NULL) return(NULL);
11913 if (sax != NULL) {
11914 ctxt->sax = sax;
11915 ctxt->userData = NULL;
11916 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011917 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011918
11919 xmlParseDocument(ctxt);
11920 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11921 else {
11922 ret = NULL;
11923 xmlFreeDoc(ctxt->myDoc);
11924 ctxt->myDoc = NULL;
11925 }
11926 if (sax != NULL)
11927 ctxt->sax = NULL;
11928 xmlFreeParserCtxt(ctxt);
11929
11930 return(ret);
11931}
11932
11933/**
11934 * xmlParseDoc:
11935 * @cur: a pointer to an array of xmlChar
11936 *
11937 * parse an XML in-memory document and build a tree.
11938 *
11939 * Returns the resulting document tree
11940 */
11941
11942xmlDocPtr
11943xmlParseDoc(xmlChar *cur) {
11944 return(xmlSAXParseDoc(NULL, cur, 0));
11945}
Daniel Veillard81273902003-09-30 00:43:48 +000011946#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011947
Daniel Veillard81273902003-09-30 00:43:48 +000011948#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011949/************************************************************************
11950 * *
11951 * Specific function to keep track of entities references *
11952 * and used by the XSLT debugger *
11953 * *
11954 ************************************************************************/
11955
11956static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11957
11958/**
11959 * xmlAddEntityReference:
11960 * @ent : A valid entity
11961 * @firstNode : A valid first node for children of entity
11962 * @lastNode : A valid last node of children entity
11963 *
11964 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11965 */
11966static void
11967xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11968 xmlNodePtr lastNode)
11969{
11970 if (xmlEntityRefFunc != NULL) {
11971 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11972 }
11973}
11974
11975
11976/**
11977 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011978 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011979 *
11980 * Set the function to call call back when a xml reference has been made
11981 */
11982void
11983xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11984{
11985 xmlEntityRefFunc = func;
11986}
Daniel Veillard81273902003-09-30 00:43:48 +000011987#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011988
11989/************************************************************************
11990 * *
11991 * Miscellaneous *
11992 * *
11993 ************************************************************************/
11994
11995#ifdef LIBXML_XPATH_ENABLED
11996#include <libxml/xpath.h>
11997#endif
11998
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011999extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012000static int xmlParserInitialized = 0;
12001
12002/**
12003 * xmlInitParser:
12004 *
12005 * Initialization function for the XML parser.
12006 * This is not reentrant. Call once before processing in case of
12007 * use in multithreaded programs.
12008 */
12009
12010void
12011xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012012 if (xmlParserInitialized != 0)
12013 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012014
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012015 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12016 (xmlGenericError == NULL))
12017 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012018 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012019 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012020 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012021 xmlInitCharEncodingHandlers();
12022 xmlInitializePredefinedEntities();
12023 xmlDefaultSAXHandlerInit();
12024 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012025#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012026 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012027#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012028#ifdef LIBXML_HTML_ENABLED
12029 htmlInitAutoClose();
12030 htmlDefaultSAXHandlerInit();
12031#endif
12032#ifdef LIBXML_XPATH_ENABLED
12033 xmlXPathInit();
12034#endif
12035 xmlParserInitialized = 1;
12036}
12037
12038/**
12039 * xmlCleanupParser:
12040 *
12041 * Cleanup function for the XML parser. It tries to reclaim all
12042 * parsing related global memory allocated for the parser processing.
12043 * It doesn't deallocate any document related memory. Calling this
12044 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000012045 * One should call xmlCleanupParser() only when the process has
12046 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012047 */
12048
12049void
12050xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012051 if (!xmlParserInitialized)
12052 return;
12053
Owen Taylor3473f882001-02-23 17:55:21 +000012054 xmlCleanupCharEncodingHandlers();
12055 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012056#ifdef LIBXML_CATALOG_ENABLED
12057 xmlCatalogCleanup();
12058#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000012059 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012060 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012061 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012062}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012063
12064/************************************************************************
12065 * *
12066 * New set (2.6.0) of simpler and more flexible APIs *
12067 * *
12068 ************************************************************************/
12069
12070/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012071 * DICT_FREE:
12072 * @str: a string
12073 *
12074 * Free a string if it is not owned by the "dict" dictionnary in the
12075 * current scope
12076 */
12077#define DICT_FREE(str) \
12078 if ((str) && ((!dict) || \
12079 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12080 xmlFree((char *)(str));
12081
12082/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012083 * xmlCtxtReset:
12084 * @ctxt: an XML parser context
12085 *
12086 * Reset a parser context
12087 */
12088void
12089xmlCtxtReset(xmlParserCtxtPtr ctxt)
12090{
12091 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012092 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012093
12094 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12095 xmlFreeInputStream(input);
12096 }
12097 ctxt->inputNr = 0;
12098 ctxt->input = NULL;
12099
12100 ctxt->spaceNr = 0;
12101 ctxt->spaceTab[0] = -1;
12102 ctxt->space = &ctxt->spaceTab[0];
12103
12104
12105 ctxt->nodeNr = 0;
12106 ctxt->node = NULL;
12107
12108 ctxt->nameNr = 0;
12109 ctxt->name = NULL;
12110
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012111 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012112 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012113 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012114 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012115 DICT_FREE(ctxt->directory);
12116 ctxt->directory = NULL;
12117 DICT_FREE(ctxt->extSubURI);
12118 ctxt->extSubURI = NULL;
12119 DICT_FREE(ctxt->extSubSystem);
12120 ctxt->extSubSystem = NULL;
12121 if (ctxt->myDoc != NULL)
12122 xmlFreeDoc(ctxt->myDoc);
12123 ctxt->myDoc = NULL;
12124
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012125 ctxt->standalone = -1;
12126 ctxt->hasExternalSubset = 0;
12127 ctxt->hasPErefs = 0;
12128 ctxt->html = 0;
12129 ctxt->external = 0;
12130 ctxt->instate = XML_PARSER_START;
12131 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012132
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012133 ctxt->wellFormed = 1;
12134 ctxt->nsWellFormed = 1;
12135 ctxt->valid = 1;
12136 ctxt->vctxt.userData = ctxt;
12137 ctxt->vctxt.error = xmlParserValidityError;
12138 ctxt->vctxt.warning = xmlParserValidityWarning;
12139 ctxt->record_info = 0;
12140 ctxt->nbChars = 0;
12141 ctxt->checkIndex = 0;
12142 ctxt->inSubset = 0;
12143 ctxt->errNo = XML_ERR_OK;
12144 ctxt->depth = 0;
12145 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12146 ctxt->catalogs = NULL;
12147 xmlInitNodeInfoSeq(&ctxt->node_seq);
12148
12149 if (ctxt->attsDefault != NULL) {
12150 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12151 ctxt->attsDefault = NULL;
12152 }
12153 if (ctxt->attsSpecial != NULL) {
12154 xmlHashFree(ctxt->attsSpecial, NULL);
12155 ctxt->attsSpecial = NULL;
12156 }
12157
Daniel Veillard4432df22003-09-28 18:58:27 +000012158#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012159 if (ctxt->catalogs != NULL)
12160 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012161#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012162}
12163
12164/**
12165 * xmlCtxtUseOptions:
12166 * @ctxt: an XML parser context
12167 * @options: a combination of xmlParserOption(s)
12168 *
12169 * Applies the options to the parser context
12170 *
12171 * Returns 0 in case of success, the set of unknown or unimplemented options
12172 * in case of error.
12173 */
12174int
12175xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12176{
12177 if (options & XML_PARSE_RECOVER) {
12178 ctxt->recovery = 1;
12179 options -= XML_PARSE_RECOVER;
12180 } else
12181 ctxt->recovery = 0;
12182 if (options & XML_PARSE_DTDLOAD) {
12183 ctxt->loadsubset = XML_DETECT_IDS;
12184 options -= XML_PARSE_DTDLOAD;
12185 } else
12186 ctxt->loadsubset = 0;
12187 if (options & XML_PARSE_DTDATTR) {
12188 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12189 options -= XML_PARSE_DTDATTR;
12190 }
12191 if (options & XML_PARSE_NOENT) {
12192 ctxt->replaceEntities = 1;
12193 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12194 options -= XML_PARSE_NOENT;
12195 } else
12196 ctxt->replaceEntities = 0;
12197 if (options & XML_PARSE_NOWARNING) {
12198 ctxt->sax->warning = NULL;
12199 options -= XML_PARSE_NOWARNING;
12200 }
12201 if (options & XML_PARSE_NOERROR) {
12202 ctxt->sax->error = NULL;
12203 ctxt->sax->fatalError = NULL;
12204 options -= XML_PARSE_NOERROR;
12205 }
12206 if (options & XML_PARSE_PEDANTIC) {
12207 ctxt->pedantic = 1;
12208 options -= XML_PARSE_PEDANTIC;
12209 } else
12210 ctxt->pedantic = 0;
12211 if (options & XML_PARSE_NOBLANKS) {
12212 ctxt->keepBlanks = 0;
12213 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12214 options -= XML_PARSE_NOBLANKS;
12215 } else
12216 ctxt->keepBlanks = 1;
12217 if (options & XML_PARSE_DTDVALID) {
12218 ctxt->validate = 1;
12219 if (options & XML_PARSE_NOWARNING)
12220 ctxt->vctxt.warning = NULL;
12221 if (options & XML_PARSE_NOERROR)
12222 ctxt->vctxt.error = NULL;
12223 options -= XML_PARSE_DTDVALID;
12224 } else
12225 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012226#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012227 if (options & XML_PARSE_SAX1) {
12228 ctxt->sax->startElement = xmlSAX2StartElement;
12229 ctxt->sax->endElement = xmlSAX2EndElement;
12230 ctxt->sax->startElementNs = NULL;
12231 ctxt->sax->endElementNs = NULL;
12232 ctxt->sax->initialized = 1;
12233 options -= XML_PARSE_SAX1;
12234 }
Daniel Veillard81273902003-09-30 00:43:48 +000012235#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012236 if (options & XML_PARSE_NODICT) {
12237 ctxt->dictNames = 0;
12238 options -= XML_PARSE_NODICT;
12239 } else {
12240 ctxt->dictNames = 1;
12241 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012242 if (options & XML_PARSE_NOCDATA) {
12243 ctxt->sax->cdataBlock = NULL;
12244 options -= XML_PARSE_NOCDATA;
12245 }
12246 if (options & XML_PARSE_NSCLEAN) {
12247 ctxt->options |= XML_PARSE_NSCLEAN;
12248 options -= XML_PARSE_NSCLEAN;
12249 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012250 return (options);
12251}
12252
12253/**
12254 * xmlDoRead:
12255 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012256 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012257 * @encoding: the document encoding, or NULL
12258 * @options: a combination of xmlParserOption(s)
12259 * @reuse: keep the context for reuse
12260 *
12261 * Common front-end for the xmlRead functions
12262 *
12263 * Returns the resulting document tree or NULL
12264 */
12265static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012266xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12267 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012268{
12269 xmlDocPtr ret;
12270
12271 xmlCtxtUseOptions(ctxt, options);
12272 if (encoding != NULL) {
12273 xmlCharEncodingHandlerPtr hdlr;
12274
12275 hdlr = xmlFindCharEncodingHandler(encoding);
12276 if (hdlr != NULL)
12277 xmlSwitchToEncoding(ctxt, hdlr);
12278 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012279 if ((URL != NULL) && (ctxt->input != NULL) &&
12280 (ctxt->input->filename == NULL))
12281 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012282 xmlParseDocument(ctxt);
12283 if ((ctxt->wellFormed) || ctxt->recovery)
12284 ret = ctxt->myDoc;
12285 else {
12286 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012287 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012288 if ((ctxt->dictNames) &&
12289 (ctxt->myDoc->dict == ctxt->dict))
12290 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012291 xmlFreeDoc(ctxt->myDoc);
12292 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012293 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012294 ctxt->myDoc = NULL;
12295 if (!reuse) {
12296 if ((ctxt->dictNames) &&
12297 (ret != NULL) &&
12298 (ret->dict == ctxt->dict))
12299 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012300 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012301 } else {
12302 /* Must duplicate the reference to the dictionary */
12303 if ((ctxt->dictNames) &&
12304 (ret != NULL) &&
12305 (ret->dict == ctxt->dict))
12306 xmlDictReference(ctxt->dict);
12307 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012308
12309 return (ret);
12310}
12311
12312/**
12313 * xmlReadDoc:
12314 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012315 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012316 * @encoding: the document encoding, or NULL
12317 * @options: a combination of xmlParserOption(s)
12318 *
12319 * parse an XML in-memory document and build a tree.
12320 *
12321 * Returns the resulting document tree
12322 */
12323xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012324xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012325{
12326 xmlParserCtxtPtr ctxt;
12327
12328 if (cur == NULL)
12329 return (NULL);
12330
12331 ctxt = xmlCreateDocParserCtxt(cur);
12332 if (ctxt == NULL)
12333 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012334 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012335}
12336
12337/**
12338 * xmlReadFile:
12339 * @filename: a file or URL
12340 * @encoding: the document encoding, or NULL
12341 * @options: a combination of xmlParserOption(s)
12342 *
12343 * parse an XML file from the filesystem or the network.
12344 *
12345 * Returns the resulting document tree
12346 */
12347xmlDocPtr
12348xmlReadFile(const char *filename, const char *encoding, int options)
12349{
12350 xmlParserCtxtPtr ctxt;
12351
12352 ctxt = xmlCreateFileParserCtxt(filename);
12353 if (ctxt == NULL)
12354 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012355 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012356}
12357
12358/**
12359 * xmlReadMemory:
12360 * @buffer: a pointer to a char array
12361 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012362 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012363 * @encoding: the document encoding, or NULL
12364 * @options: a combination of xmlParserOption(s)
12365 *
12366 * parse an XML in-memory document and build a tree.
12367 *
12368 * Returns the resulting document tree
12369 */
12370xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012371xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012372{
12373 xmlParserCtxtPtr ctxt;
12374
12375 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12376 if (ctxt == NULL)
12377 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012378 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012379}
12380
12381/**
12382 * xmlReadFd:
12383 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012384 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012385 * @encoding: the document encoding, or NULL
12386 * @options: a combination of xmlParserOption(s)
12387 *
12388 * parse an XML from a file descriptor and build a tree.
12389 *
12390 * Returns the resulting document tree
12391 */
12392xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012393xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012394{
12395 xmlParserCtxtPtr ctxt;
12396 xmlParserInputBufferPtr input;
12397 xmlParserInputPtr stream;
12398
12399 if (fd < 0)
12400 return (NULL);
12401
12402 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12403 if (input == NULL)
12404 return (NULL);
12405 ctxt = xmlNewParserCtxt();
12406 if (ctxt == NULL) {
12407 xmlFreeParserInputBuffer(input);
12408 return (NULL);
12409 }
12410 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12411 if (stream == NULL) {
12412 xmlFreeParserInputBuffer(input);
12413 xmlFreeParserCtxt(ctxt);
12414 return (NULL);
12415 }
12416 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012417 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012418}
12419
12420/**
12421 * xmlReadIO:
12422 * @ioread: an I/O read function
12423 * @ioclose: an I/O close function
12424 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012425 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012426 * @encoding: the document encoding, or NULL
12427 * @options: a combination of xmlParserOption(s)
12428 *
12429 * parse an XML document from I/O functions and source and build a tree.
12430 *
12431 * Returns the resulting document tree
12432 */
12433xmlDocPtr
12434xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012435 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012436{
12437 xmlParserCtxtPtr ctxt;
12438 xmlParserInputBufferPtr input;
12439 xmlParserInputPtr stream;
12440
12441 if (ioread == NULL)
12442 return (NULL);
12443
12444 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12445 XML_CHAR_ENCODING_NONE);
12446 if (input == NULL)
12447 return (NULL);
12448 ctxt = xmlNewParserCtxt();
12449 if (ctxt == NULL) {
12450 xmlFreeParserInputBuffer(input);
12451 return (NULL);
12452 }
12453 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12454 if (stream == NULL) {
12455 xmlFreeParserInputBuffer(input);
12456 xmlFreeParserCtxt(ctxt);
12457 return (NULL);
12458 }
12459 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012460 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012461}
12462
12463/**
12464 * xmlCtxtReadDoc:
12465 * @ctxt: an XML parser context
12466 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012467 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012468 * @encoding: the document encoding, or NULL
12469 * @options: a combination of xmlParserOption(s)
12470 *
12471 * parse an XML in-memory document and build a tree.
12472 * This reuses the existing @ctxt parser context
12473 *
12474 * Returns the resulting document tree
12475 */
12476xmlDocPtr
12477xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012478 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012479{
12480 xmlParserInputPtr stream;
12481
12482 if (cur == NULL)
12483 return (NULL);
12484 if (ctxt == NULL)
12485 return (NULL);
12486
12487 xmlCtxtReset(ctxt);
12488
12489 stream = xmlNewStringInputStream(ctxt, cur);
12490 if (stream == NULL) {
12491 return (NULL);
12492 }
12493 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012494 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012495}
12496
12497/**
12498 * xmlCtxtReadFile:
12499 * @ctxt: an XML parser context
12500 * @filename: a file or URL
12501 * @encoding: the document encoding, or NULL
12502 * @options: a combination of xmlParserOption(s)
12503 *
12504 * parse an XML file from the filesystem or the network.
12505 * This reuses the existing @ctxt parser context
12506 *
12507 * Returns the resulting document tree
12508 */
12509xmlDocPtr
12510xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12511 const char *encoding, int options)
12512{
12513 xmlParserInputPtr stream;
12514
12515 if (filename == NULL)
12516 return (NULL);
12517 if (ctxt == NULL)
12518 return (NULL);
12519
12520 xmlCtxtReset(ctxt);
12521
12522 stream = xmlNewInputFromFile(ctxt, filename);
12523 if (stream == NULL) {
12524 return (NULL);
12525 }
12526 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012527 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012528}
12529
12530/**
12531 * xmlCtxtReadMemory:
12532 * @ctxt: an XML parser context
12533 * @buffer: a pointer to a char array
12534 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012535 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012536 * @encoding: the document encoding, or NULL
12537 * @options: a combination of xmlParserOption(s)
12538 *
12539 * parse an XML in-memory document and build a tree.
12540 * This reuses the existing @ctxt parser context
12541 *
12542 * Returns the resulting document tree
12543 */
12544xmlDocPtr
12545xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012546 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012547{
12548 xmlParserInputBufferPtr input;
12549 xmlParserInputPtr stream;
12550
12551 if (ctxt == NULL)
12552 return (NULL);
12553 if (buffer == NULL)
12554 return (NULL);
12555
12556 xmlCtxtReset(ctxt);
12557
12558 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12559 if (input == NULL) {
12560 return(NULL);
12561 }
12562
12563 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12564 if (stream == NULL) {
12565 xmlFreeParserInputBuffer(input);
12566 return(NULL);
12567 }
12568
12569 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012570 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012571}
12572
12573/**
12574 * xmlCtxtReadFd:
12575 * @ctxt: an XML parser context
12576 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012577 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012578 * @encoding: the document encoding, or NULL
12579 * @options: a combination of xmlParserOption(s)
12580 *
12581 * parse an XML from a file descriptor and build a tree.
12582 * This reuses the existing @ctxt parser context
12583 *
12584 * Returns the resulting document tree
12585 */
12586xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012587xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12588 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012589{
12590 xmlParserInputBufferPtr input;
12591 xmlParserInputPtr stream;
12592
12593 if (fd < 0)
12594 return (NULL);
12595 if (ctxt == NULL)
12596 return (NULL);
12597
12598 xmlCtxtReset(ctxt);
12599
12600
12601 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12602 if (input == NULL)
12603 return (NULL);
12604 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12605 if (stream == NULL) {
12606 xmlFreeParserInputBuffer(input);
12607 return (NULL);
12608 }
12609 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012610 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012611}
12612
12613/**
12614 * xmlCtxtReadIO:
12615 * @ctxt: an XML parser context
12616 * @ioread: an I/O read function
12617 * @ioclose: an I/O close function
12618 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012619 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012620 * @encoding: the document encoding, or NULL
12621 * @options: a combination of xmlParserOption(s)
12622 *
12623 * parse an XML document from I/O functions and source and build a tree.
12624 * This reuses the existing @ctxt parser context
12625 *
12626 * Returns the resulting document tree
12627 */
12628xmlDocPtr
12629xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12630 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012631 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012632 const char *encoding, int options)
12633{
12634 xmlParserInputBufferPtr input;
12635 xmlParserInputPtr stream;
12636
12637 if (ioread == NULL)
12638 return (NULL);
12639 if (ctxt == NULL)
12640 return (NULL);
12641
12642 xmlCtxtReset(ctxt);
12643
12644 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12645 XML_CHAR_ENCODING_NONE);
12646 if (input == NULL)
12647 return (NULL);
12648 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12649 if (stream == NULL) {
12650 xmlFreeParserInputBuffer(input);
12651 return (NULL);
12652 }
12653 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012654 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012655}