blob: 501b55ea1dc4b0906352d5cfaf3ced7efeaa6a50 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard0fb18932003-09-07 09:14:37 +000088#define SAX2 1
89
Daniel Veillard21a0f912001-02-25 19:54:14 +000090#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000091#define XML_PARSER_BUFFER_SIZE 100
92
Daniel Veillard5997aca2002-03-18 18:36:20 +000093#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
94
Owen Taylor3473f882001-02-23 17:55:21 +000095/*
Owen Taylor3473f882001-02-23 17:55:21 +000096 * List of XML prefixed PI allowed by W3C specs
97 */
98
Daniel Veillardb44025c2001-10-11 22:55:55 +000099static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000100 "xml-stylesheet",
101 NULL
102};
103
104/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000105xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
106 const xmlChar **str);
107
Daniel Veillard7d515752003-09-26 19:12:37 +0000108static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000109xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
110 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000111 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000112 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000113
Daniel Veillard8107a222002-01-13 14:10:10 +0000114static void
115xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
116 xmlNodePtr lastNode);
117
Daniel Veillard7d515752003-09-26 19:12:37 +0000118static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000119xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
120 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000121
122/************************************************************************
123 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000124 * Some factorized error routines *
125 * *
126 ************************************************************************/
127
128/**
129 * xmlErrMemory:
130 * @ctxt: an XML parser context
131 * @extra: extra informations
132 *
133 * Handle a redefinition of attribute error
134 */
135static void
136xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
137{
138 if (ctxt != NULL) {
139 ctxt->errNo = XML_ERR_NO_MEMORY;
140 ctxt->instate = XML_PARSER_EOF;
141 ctxt->disableSAX = 1;
142 }
143 if ((ctxt != NULL) && (ctxt->sax != NULL)
144 && (ctxt->sax->error != NULL)) {
145 if (extra)
146 ctxt->sax->error(ctxt->userData,
147 "Memory allocation failed : %s\n", extra);
148 else
149 ctxt->sax->error(ctxt->userData,
150 "Memory allocation failed !\n");
151 } else {
152 if (extra)
153 xmlGenericError(xmlGenericErrorContext,
154 "Memory allocation failed : %s\n", extra);
155 else
156 xmlGenericError(xmlGenericErrorContext,
157 "Memory allocation failed !\n");
158 }
159}
160
161/**
162 * xmlErrAttributeDup:
163 * @ctxt: an XML parser context
164 * @prefix: the attribute prefix
165 * @localname: the attribute localname
166 *
167 * Handle a redefinition of attribute error
168 */
169static void
170xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
171 const xmlChar * localname)
172{
173 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
175 if (prefix == NULL)
176 ctxt->sax->error(ctxt->userData,
177 "Attribute %s redefined\n", localname);
178 else
179 ctxt->sax->error(ctxt->userData,
180 "Attribute %s:%s redefined\n", prefix,
181 localname);
182 }
183 ctxt->wellFormed = 0;
184 if (ctxt->recovery == 0)
185 ctxt->disableSAX = 1;
186}
187
188/**
189 * xmlFatalErr:
190 * @ctxt: an XML parser context
191 * @error: the error number
192 * @extra: extra information string
193 *
194 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
195 */
196static void
197xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char * info)
198{
199 const char *errmsg;
200
201 if (ctxt == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "xmlFatalErr: no context !\n");
204 return;
205 }
206 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
207 return;
208 switch (error) {
209 case XML_ERR_INVALID_HEX_CHARREF:
210 errmsg = "CharRef: invalid hexadecimal value\n";
211 break;
212 case XML_ERR_INVALID_DEC_CHARREF:
213 errmsg = "CharRef: invalid decimal value\n";
214 break;
215 case XML_ERR_INVALID_CHARREF:
216 errmsg = "CharRef: invalid value\n";
217 break;
218 case XML_ERR_INTERNAL_ERROR:
219 errmsg = "internal error";
220 break;
221 case XML_ERR_PEREF_AT_EOF:
222 errmsg = "PEReference at end of document\n";
223 break;
224 case XML_ERR_PEREF_IN_PROLOG:
225 errmsg = "PEReference in prolog\n";
226 break;
227 case XML_ERR_PEREF_IN_EPILOG:
228 errmsg = "PEReference in epilog\n";
229 break;
230 case XML_ERR_PEREF_NO_NAME:
231 errmsg = "PEReference: no name\n";
232 break;
233 case XML_ERR_PEREF_SEMICOL_MISSING:
234 errmsg = "PEReference: expecting ';'\n";
235 break;
236 case XML_ERR_ENTITY_LOOP:
237 errmsg = "Detected an entity reference loop\n";
238 break;
239 case XML_ERR_ENTITY_NOT_STARTED:
240 errmsg = "EntityValue: \" or ' expected\n";
241 break;
242 case XML_ERR_ENTITY_PE_INTERNAL:
243 errmsg = "PEReferences forbidden in internal subset\n";
244 break;
245 case XML_ERR_ENTITY_NOT_FINISHED:
246 errmsg = "EntityValue: \" or ' expected\n";
247 break;
248 case XML_ERR_ATTRIBUTE_NOT_STARTED:
249 errmsg = "AttValue: \" or ' expected\n";
250 break;
251 case XML_ERR_LT_IN_ATTRIBUTE:
252 errmsg = "Unescaped '<' not allowed in attributes values\n";
253 break;
254 case XML_ERR_LITERAL_NOT_STARTED:
255 errmsg = "SystemLiteral \" or ' expected\n";
256 break;
257 case XML_ERR_LITERAL_NOT_FINISHED:
258 errmsg = "Unfinished System or Public ID \" or ' expected\n";
259 break;
260 case XML_ERR_MISPLACED_CDATA_END:
261 errmsg = "Sequence ']]>' not allowed in content\n";
262 break;
263 case XML_ERR_URI_REQUIRED:
264 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
265 break;
266 case XML_ERR_PUBID_REQUIRED:
267 errmsg = "PUBLIC, the Public Identifier is missing\n";
268 break;
269 case XML_ERR_HYPHEN_IN_COMMENT:
270 errmsg = "Comment must not contain '--' (double-hyphen)\n";
271 break;
272 case XML_ERR_PI_NOT_STARTED:
273 errmsg = "xmlParsePI : no target name\n";
274 break;
275 case XML_ERR_RESERVED_XML_NAME:
276 errmsg = "Invalid PI name\n";
277 break;
278 case XML_ERR_NOTATION_NOT_STARTED:
279 errmsg = "NOTATION: Name expected here\n";
280 break;
281 case XML_ERR_NOTATION_NOT_FINISHED:
282 errmsg = "'>' required to close NOTATION declaration\n";
283 break;
284 case XML_ERR_VALUE_REQUIRED:
285 errmsg = "Entity value required\n";
286 break;
287 case XML_ERR_URI_FRAGMENT:
288 errmsg = "Fragment not allowed";
289 break;
290 case XML_ERR_ATTLIST_NOT_STARTED:
291 errmsg = "'(' required to start ATTLIST enumeration\n";
292 break;
293 case XML_ERR_NMTOKEN_REQUIRED:
294 errmsg = "NmToken expected in ATTLIST enumeration\n";
295 break;
296 case XML_ERR_ATTLIST_NOT_FINISHED:
297 errmsg = "')' required to finish ATTLIST enumeration\n";
298 break;
299 case XML_ERR_MIXED_NOT_STARTED:
300 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
301 break;
302 case XML_ERR_PCDATA_REQUIRED:
303 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
304 break;
305 case XML_ERR_ELEMCONTENT_NOT_STARTED:
306 errmsg = "ContentDecl : Name or '(' expected\n";
307 break;
308 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
309 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
310 break;
311 case XML_ERR_PEREF_IN_INT_SUBSET:
312 errmsg = "PEReference: forbidden within markup decl in internal subset\n";
313 break;
314 case XML_ERR_GT_REQUIRED:
315 errmsg = "expected '>'\n";
316 break;
317 case XML_ERR_CONDSEC_INVALID:
318 errmsg = "XML conditional section '[' expected\n";
319 break;
320 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
321 errmsg = "Content error in the external subset\n";
322 break;
323 case XML_ERR_CONDSEC_INVALID_KEYWORD:
324 errmsg = "conditional section INCLUDE or IGNORE keyword expected\n";
325 break;
326 case XML_ERR_CONDSEC_NOT_FINISHED:
327 errmsg = "XML conditional section not closed\n";
328 break;
329 case XML_ERR_XMLDECL_NOT_STARTED:
330 errmsg = "Text declaration '<?xml' required\n";
331 break;
332 case XML_ERR_XMLDECL_NOT_FINISHED:
333 errmsg = "parsing XML declaration: '?>' expected\n";
334 break;
335 case XML_ERR_EXT_ENTITY_STANDALONE:
336 errmsg = "external parsed entities cannot be standalone\n";
337 break;
338 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
339 errmsg = "EntityRef: expecting ';'\n";
340 break;
341 case XML_ERR_DOCTYPE_NOT_FINISHED:
342 errmsg = "DOCTYPE improperly terminated\n";
343 break;
344 case XML_ERR_LTSLASH_REQUIRED:
345 errmsg = "EndTag: '</' not found\n";
346 break;
347 case XML_ERR_EQUAL_REQUIRED:
348 errmsg = "expected '='\n";
349 break;
350 case XML_ERR_STRING_NOT_CLOSED:
351 errmsg = "String not closed expecting \" or '\n";
352 break;
353 case XML_ERR_STRING_NOT_STARTED:
354 errmsg = "String not started expecting ' or \"\n";
355 break;
356 case XML_ERR_ENCODING_NAME:
357 errmsg = "Invalid XML encoding name\n";
358 break;
359 case XML_ERR_STANDALONE_VALUE:
360 errmsg = "standalone accepts only 'yes' or 'no'\n";
361 break;
362 case XML_ERR_DOCUMENT_EMPTY:
363 errmsg = "Document is empty\n";
364 break;
365 case XML_ERR_DOCUMENT_END:
366 errmsg = "Extra content at the end of the document\n";
367 break;
368 case XML_ERR_NOT_WELL_BALANCED:
369 errmsg = "chunk is not well balanced\n";
370 break;
371 case XML_ERR_EXTRA_CONTENT:
372 errmsg = "extra content at the end of well balanced chunk\n";
373 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000374 case XML_ERR_VERSION_MISSING:
375 errmsg = "Malformed declaration expecting version\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377#if 0
378 case :
379 errmsg = "\n";
380 break;
381#endif
382 default:
383 errmsg = "Unregistered error message\n";
384 }
385 ctxt->errNo = error;
386 if (info == NULL) {
387 ctxt->sax->error(ctxt->userData, errmsg);
388 } else {
389 ctxt->sax->error(ctxt->userData, "%s: %s", errmsg, info);
390 }
391 ctxt->wellFormed = 0;
392 if (ctxt->recovery == 0)
393 ctxt->disableSAX = 1;
394}
395
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000396/**
397 * xmlFatalErrMsg:
398 * @ctxt: an XML parser context
399 * @error: the error number
400 * @msg: the error message
401 *
402 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
403 */
404static void
405xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg)
406{
407 if (ctxt == NULL) {
408 xmlGenericError(xmlGenericErrorContext,
409 "xmlFatalErr: no context !\n");
410 return;
411 }
412 ctxt->errNo = error;
413 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
414 return;
415 ctxt->sax->error(ctxt->userData, msg);
416 ctxt->wellFormed = 0;
417 if (ctxt->recovery == 0)
418 ctxt->disableSAX = 1;
419}
420
421/**
422 * xmlFatalErrMsgInt:
423 * @ctxt: an XML parser context
424 * @error: the error number
425 * @msg: the error message
426 * @val: an integer value
427 *
428 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
429 */
430static void
431xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
432 const char *msg, int val)
433{
434 if (ctxt == NULL) {
435 xmlGenericError(xmlGenericErrorContext,
436 "xmlFatalErr: no context !\n");
437 return;
438 }
439 ctxt->errNo = error;
440 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
441 return;
442 ctxt->sax->error(ctxt->userData, msg, val);
443 ctxt->wellFormed = 0;
444 if (ctxt->recovery == 0)
445 ctxt->disableSAX = 1;
446}
447
448/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000449 * xmlFatalErrMsgStr:
450 * @ctxt: an XML parser context
451 * @error: the error number
452 * @msg: the error message
453 * @val: a string value
454 *
455 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
456 */
457static void
458xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
459 const char *msg, const xmlChar *val)
460{
461 if (ctxt == NULL) {
462 xmlGenericError(xmlGenericErrorContext,
463 "xmlFatalErr: no context !\n");
464 return;
465 }
466 ctxt->errNo = error;
467 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
468 return;
469 ctxt->sax->error(ctxt->userData, msg, val);
470 ctxt->wellFormed = 0;
471 if (ctxt->recovery == 0)
472 ctxt->disableSAX = 1;
473}
474
475/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 * xmlNsErr:
477 * @ctxt: an XML parser context
478 * @error: the error number
479 * @msg: the message
480 * @info1: extra information string
481 * @info2: extra information string
482 *
483 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
484 */
485static void
486xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
487 const char *msg,
488 const xmlChar *info1, const xmlChar *info2, const xmlChar *info3)
489{
490 if (ctxt == NULL)
491 return;
492 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
493 return;
494
495 ctxt->errNo = error;
496 if (info1 == NULL) {
497 ctxt->sax->error(ctxt->userData, msg);
498 } else if (info2 == NULL) {
499 ctxt->sax->error(ctxt->userData, msg, info1);
500 } else if (info3 == NULL) {
501 ctxt->sax->error(ctxt->userData, msg, info1, info2);
502 } else {
503 ctxt->sax->error(ctxt->userData, msg, info1, info2, info3);
504 }
505 ctxt->nsWellFormed = 0;
506}
507
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508/************************************************************************
509 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000510 * SAX2 defaulted attributes handling *
511 * *
512 ************************************************************************/
513
514/**
515 * xmlDetectSAX2:
516 * @ctxt: an XML parser context
517 *
518 * Do the SAX2 detection and specific intialization
519 */
520static void
521xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
522 if (ctxt == NULL) return;
523 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
524 ((ctxt->sax->startElementNs != NULL) ||
525 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
526
527 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
528 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
529 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
530}
531
532#ifdef SAX2
533typedef struct _xmlDefAttrs xmlDefAttrs;
534typedef xmlDefAttrs *xmlDefAttrsPtr;
535struct _xmlDefAttrs {
536 int nbAttrs; /* number of defaulted attributes on that element */
537 int maxAttrs; /* the size of the array */
538 const xmlChar *values[4]; /* array of localname/prefix/values */
539};
540#endif
541
542/**
543 * xmlAddDefAttrs:
544 * @ctxt: an XML parser context
545 * @fullname: the element fullname
546 * @fullattr: the attribute fullname
547 * @value: the attribute value
548 *
549 * Add a defaulted attribute for an element
550 */
551static void
552xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
553 const xmlChar *fullname,
554 const xmlChar *fullattr,
555 const xmlChar *value) {
556 xmlDefAttrsPtr defaults;
557 int len;
558 const xmlChar *name;
559 const xmlChar *prefix;
560
561 if (ctxt->attsDefault == NULL) {
562 ctxt->attsDefault = xmlHashCreate(10);
563 if (ctxt->attsDefault == NULL)
564 goto mem_error;
565 }
566
567 /*
568 * plit the element name into prefix:localname , the string found
569 * are within the DTD and hen not associated to namespace names.
570 */
571 name = xmlSplitQName3(fullname, &len);
572 if (name == NULL) {
573 name = xmlDictLookup(ctxt->dict, fullname, -1);
574 prefix = NULL;
575 } else {
576 name = xmlDictLookup(ctxt->dict, name, -1);
577 prefix = xmlDictLookup(ctxt->dict, fullname, len);
578 }
579
580 /*
581 * make sure there is some storage
582 */
583 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
584 if (defaults == NULL) {
585 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
586 12 * sizeof(const xmlChar *));
587 if (defaults == NULL)
588 goto mem_error;
589 defaults->maxAttrs = 4;
590 defaults->nbAttrs = 0;
591 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
592 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
593 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
594 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
595 if (defaults == NULL)
596 goto mem_error;
597 defaults->maxAttrs *= 2;
598 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
599 }
600
601 /*
602 * plit the element name into prefix:localname , the string found
603 * are within the DTD and hen not associated to namespace names.
604 */
605 name = xmlSplitQName3(fullattr, &len);
606 if (name == NULL) {
607 name = xmlDictLookup(ctxt->dict, fullattr, -1);
608 prefix = NULL;
609 } else {
610 name = xmlDictLookup(ctxt->dict, name, -1);
611 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
612 }
613
614 defaults->values[4 * defaults->nbAttrs] = name;
615 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
616 /* intern the string and precompute the end */
617 len = xmlStrlen(value);
618 value = xmlDictLookup(ctxt->dict, value, len);
619 defaults->values[4 * defaults->nbAttrs + 2] = value;
620 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
621 defaults->nbAttrs++;
622
623 return;
624
625mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000626 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000627 return;
628}
629
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000630/**
631 * xmlAddSpecialAttr:
632 * @ctxt: an XML parser context
633 * @fullname: the element fullname
634 * @fullattr: the attribute fullname
635 * @type: the attribute type
636 *
637 * Register that this attribute is not CDATA
638 */
639static void
640xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
641 const xmlChar *fullname,
642 const xmlChar *fullattr,
643 int type)
644{
645 if (ctxt->attsSpecial == NULL) {
646 ctxt->attsSpecial = xmlHashCreate(10);
647 if (ctxt->attsSpecial == NULL)
648 goto mem_error;
649 }
650
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000651 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
652 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000653 return;
654
655mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000656 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000657 return;
658}
659
Owen Taylor3473f882001-02-23 17:55:21 +0000660/************************************************************************
661 * *
662 * Parser stacks related functions and macros *
663 * *
664 ************************************************************************/
665
666xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
667 const xmlChar ** str);
668
Daniel Veillard0fb18932003-09-07 09:14:37 +0000669#ifdef SAX2
670/**
671 * nsPush:
672 * @ctxt: an XML parser context
673 * @prefix: the namespace prefix or NULL
674 * @URL: the namespace name
675 *
676 * Pushes a new parser namespace on top of the ns stack
677 *
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000678 * Returns -1 in case of error, the index in the stack otherwise,
679 * and -2 if the namespace should be discarded.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000680 */
681static int
682nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
683{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000684 if (ctxt->options & XML_PARSE_NSCLEAN) {
685 int i;
686 for (i = 0;i < ctxt->nsNr;i += 2) {
687 if (ctxt->nsTab[i] == prefix) {
688 /* in scope */
689 if (ctxt->nsTab[i + 1] == URL)
690 return(-2);
691 /* out of scope keep it */
692 break;
693 }
694 }
695 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000696 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
697 ctxt->nsMax = 10;
698 ctxt->nsNr = 0;
699 ctxt->nsTab = (const xmlChar **)
700 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
701 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000703 ctxt->nsMax = 0;
704 return (-1);
705 }
706 } else if (ctxt->nsNr >= ctxt->nsMax) {
707 ctxt->nsMax *= 2;
708 ctxt->nsTab = (const xmlChar **)
709 xmlRealloc(ctxt->nsTab,
710 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
711 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000712 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000713 ctxt->nsMax /= 2;
714 return (-1);
715 }
716 }
717 ctxt->nsTab[ctxt->nsNr++] = prefix;
718 ctxt->nsTab[ctxt->nsNr++] = URL;
719 return (ctxt->nsNr);
720}
721/**
722 * nsPop:
723 * @ctxt: an XML parser context
724 * @nr: the number to pop
725 *
726 * Pops the top @nr parser prefix/namespace from the ns stack
727 *
728 * Returns the number of namespaces removed
729 */
730static int
731nsPop(xmlParserCtxtPtr ctxt, int nr)
732{
733 int i;
734
735 if (ctxt->nsTab == NULL) return(0);
736 if (ctxt->nsNr < nr) {
737 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
738 nr = ctxt->nsNr;
739 }
740 if (ctxt->nsNr <= 0)
741 return (0);
742
743 for (i = 0;i < nr;i++) {
744 ctxt->nsNr--;
745 ctxt->nsTab[ctxt->nsNr] = NULL;
746 }
747 return(nr);
748}
749#endif
750
751static int
752xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
753 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000754 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000755 int maxatts;
756
757 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000758 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000759 atts = (const xmlChar **)
760 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000761 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000762 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000763 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
764 if (attallocs == NULL) goto mem_error;
765 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000766 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000767 } else if (nr + 5 > ctxt->maxatts) {
768 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000769 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
770 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000771 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000772 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000773 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
774 (maxatts / 5) * sizeof(int));
775 if (attallocs == NULL) goto mem_error;
776 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000777 ctxt->maxatts = maxatts;
778 }
779 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000780mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000781 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000782 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000783}
784
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000785/**
786 * inputPush:
787 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000788 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000789 *
790 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000791 *
792 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000793 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000794extern int
795inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
796{
797 if (ctxt->inputNr >= ctxt->inputMax) {
798 ctxt->inputMax *= 2;
799 ctxt->inputTab =
800 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
801 ctxt->inputMax *
802 sizeof(ctxt->inputTab[0]));
803 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000804 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000805 return (0);
806 }
807 }
808 ctxt->inputTab[ctxt->inputNr] = value;
809 ctxt->input = value;
810 return (ctxt->inputNr++);
811}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000812/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000813 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000814 * @ctxt: an XML parser context
815 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000816 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000817 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000818 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000819 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000820extern xmlParserInputPtr
821inputPop(xmlParserCtxtPtr ctxt)
822{
823 xmlParserInputPtr ret;
824
825 if (ctxt->inputNr <= 0)
826 return (0);
827 ctxt->inputNr--;
828 if (ctxt->inputNr > 0)
829 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
830 else
831 ctxt->input = NULL;
832 ret = ctxt->inputTab[ctxt->inputNr];
833 ctxt->inputTab[ctxt->inputNr] = 0;
834 return (ret);
835}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000836/**
837 * nodePush:
838 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000839 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000840 *
841 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000842 *
843 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000844 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000845extern int
846nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
847{
848 if (ctxt->nodeNr >= ctxt->nodeMax) {
849 ctxt->nodeMax *= 2;
850 ctxt->nodeTab =
851 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
852 ctxt->nodeMax *
853 sizeof(ctxt->nodeTab[0]));
854 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000855 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000856 return (0);
857 }
858 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000859#ifdef MAX_DEPTH
860 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000861 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000862 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000863 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000864 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000865 return(0);
866 }
867#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000868 ctxt->nodeTab[ctxt->nodeNr] = value;
869 ctxt->node = value;
870 return (ctxt->nodeNr++);
871}
872/**
873 * nodePop:
874 * @ctxt: an XML parser context
875 *
876 * Pops the top element node from the node stack
877 *
878 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000879 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000880extern xmlNodePtr
881nodePop(xmlParserCtxtPtr ctxt)
882{
883 xmlNodePtr ret;
884
885 if (ctxt->nodeNr <= 0)
886 return (0);
887 ctxt->nodeNr--;
888 if (ctxt->nodeNr > 0)
889 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
890 else
891 ctxt->node = NULL;
892 ret = ctxt->nodeTab[ctxt->nodeNr];
893 ctxt->nodeTab[ctxt->nodeNr] = 0;
894 return (ret);
895}
896/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000897 * nameNsPush:
898 * @ctxt: an XML parser context
899 * @value: the element name
900 * @prefix: the element prefix
901 * @URI: the element namespace name
902 *
903 * Pushes a new element name/prefix/URL on top of the name stack
904 *
905 * Returns -1 in case of error, the index in the stack otherwise
906 */
907static int
908nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
909 const xmlChar *prefix, const xmlChar *URI, int nsNr)
910{
911 if (ctxt->nameNr >= ctxt->nameMax) {
912 const xmlChar * *tmp;
913 void **tmp2;
914 ctxt->nameMax *= 2;
915 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
916 ctxt->nameMax *
917 sizeof(ctxt->nameTab[0]));
918 if (tmp == NULL) {
919 ctxt->nameMax /= 2;
920 goto mem_error;
921 }
922 ctxt->nameTab = tmp;
923 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
924 ctxt->nameMax * 3 *
925 sizeof(ctxt->pushTab[0]));
926 if (tmp2 == NULL) {
927 ctxt->nameMax /= 2;
928 goto mem_error;
929 }
930 ctxt->pushTab = tmp2;
931 }
932 ctxt->nameTab[ctxt->nameNr] = value;
933 ctxt->name = value;
934 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
935 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000936 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937 return (ctxt->nameNr++);
938mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000939 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000940 return (-1);
941}
942/**
943 * nameNsPop:
944 * @ctxt: an XML parser context
945 *
946 * Pops the top element/prefix/URI name from the name stack
947 *
948 * Returns the name just removed
949 */
950static const xmlChar *
951nameNsPop(xmlParserCtxtPtr ctxt)
952{
953 const xmlChar *ret;
954
955 if (ctxt->nameNr <= 0)
956 return (0);
957 ctxt->nameNr--;
958 if (ctxt->nameNr > 0)
959 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
960 else
961 ctxt->name = NULL;
962 ret = ctxt->nameTab[ctxt->nameNr];
963 ctxt->nameTab[ctxt->nameNr] = NULL;
964 return (ret);
965}
966
967/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968 * namePush:
969 * @ctxt: an XML parser context
970 * @value: the element name
971 *
972 * Pushes a new element name on top of the name stack
973 *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000974 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 */
976extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000977namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978{
979 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000981 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000982 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +0000983 ctxt->nameMax *
984 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985 if (tmp == NULL) {
986 ctxt->nameMax /= 2;
987 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000988 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000990 }
991 ctxt->nameTab[ctxt->nameNr] = value;
992 ctxt->name = value;
993 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000994mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000995 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000996 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000997}
998/**
999 * namePop:
1000 * @ctxt: an XML parser context
1001 *
1002 * Pops the top element name from the name stack
1003 *
1004 * Returns the name just removed
1005 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001006extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001007namePop(xmlParserCtxtPtr ctxt)
1008{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001009 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001010
1011 if (ctxt->nameNr <= 0)
1012 return (0);
1013 ctxt->nameNr--;
1014 if (ctxt->nameNr > 0)
1015 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1016 else
1017 ctxt->name = NULL;
1018 ret = ctxt->nameTab[ctxt->nameNr];
1019 ctxt->nameTab[ctxt->nameNr] = 0;
1020 return (ret);
1021}
Owen Taylor3473f882001-02-23 17:55:21 +00001022
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001023static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001024 if (ctxt->spaceNr >= ctxt->spaceMax) {
1025 ctxt->spaceMax *= 2;
1026 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1027 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1028 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001029 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001030 return(0);
1031 }
1032 }
1033 ctxt->spaceTab[ctxt->spaceNr] = val;
1034 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1035 return(ctxt->spaceNr++);
1036}
1037
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001038static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001039 int ret;
1040 if (ctxt->spaceNr <= 0) return(0);
1041 ctxt->spaceNr--;
1042 if (ctxt->spaceNr > 0)
1043 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1044 else
1045 ctxt->space = NULL;
1046 ret = ctxt->spaceTab[ctxt->spaceNr];
1047 ctxt->spaceTab[ctxt->spaceNr] = -1;
1048 return(ret);
1049}
1050
1051/*
1052 * Macros for accessing the content. Those should be used only by the parser,
1053 * and not exported.
1054 *
1055 * Dirty macros, i.e. one often need to make assumption on the context to
1056 * use them
1057 *
1058 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1059 * To be used with extreme caution since operations consuming
1060 * characters may move the input buffer to a different location !
1061 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1062 * This should be used internally by the parser
1063 * only to compare to ASCII values otherwise it would break when
1064 * running with UTF-8 encoding.
1065 * RAW same as CUR but in the input buffer, bypass any token
1066 * extraction that may have been done
1067 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1068 * to compare on ASCII based substring.
1069 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001070 * strings without newlines within the parser.
1071 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1072 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001073 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1074 *
1075 * NEXT Skip to the next character, this does the proper decoding
1076 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001077 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001078 * CUR_CHAR(l) returns the current unicode character (int), set l
1079 * to the number of xmlChars used for the encoding [0-5].
1080 * CUR_SCHAR same but operate on a string instead of the context
1081 * COPY_BUF copy the current unicode char to the target buffer, increment
1082 * the index
1083 * GROW, SHRINK handling of input buffers
1084 */
1085
Daniel Veillardfdc91562002-07-01 21:52:03 +00001086#define RAW (*ctxt->input->cur)
1087#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001088#define NXT(val) ctxt->input->cur[(val)]
1089#define CUR_PTR ctxt->input->cur
1090
1091#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001092 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001093 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001094 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001095 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1096 xmlPopInput(ctxt); \
1097 } while (0)
1098
Daniel Veillarda880b122003-04-21 21:36:41 +00001099#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001100 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1101 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001102 xmlSHRINK (ctxt);
1103
1104static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1105 xmlParserInputShrink(ctxt->input);
1106 if ((*ctxt->input->cur == 0) &&
1107 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1108 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001109 }
Owen Taylor3473f882001-02-23 17:55:21 +00001110
Daniel Veillarda880b122003-04-21 21:36:41 +00001111#define GROW if ((ctxt->progressive == 0) && \
1112 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001113 xmlGROW (ctxt);
1114
1115static void xmlGROW (xmlParserCtxtPtr ctxt) {
1116 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1117 if ((*ctxt->input->cur == 0) &&
1118 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1119 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001120}
Owen Taylor3473f882001-02-23 17:55:21 +00001121
1122#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1123
1124#define NEXT xmlNextChar(ctxt)
1125
Daniel Veillard21a0f912001-02-25 19:54:14 +00001126#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001127 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001128 ctxt->input->cur++; \
1129 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001130 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001131 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1132 }
1133
Owen Taylor3473f882001-02-23 17:55:21 +00001134#define NEXTL(l) do { \
1135 if (*(ctxt->input->cur) == '\n') { \
1136 ctxt->input->line++; ctxt->input->col = 1; \
1137 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001138 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001139 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001140 } while (0)
1141
1142#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1143#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1144
1145#define COPY_BUF(l,b,i,v) \
1146 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001147 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001148
1149/**
1150 * xmlSkipBlankChars:
1151 * @ctxt: the XML parser context
1152 *
1153 * skip all blanks character found at that point in the input streams.
1154 * It pops up finished entities in the process if allowable at that point.
1155 *
1156 * Returns the number of space chars skipped
1157 */
1158
1159int
1160xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001161 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001162
1163 /*
1164 * It's Okay to use CUR/NEXT here since all the blanks are on
1165 * the ASCII range.
1166 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001167 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1168 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001169 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001170 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001171 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001172 cur = ctxt->input->cur;
1173 while (IS_BLANK(*cur)) {
1174 if (*cur == '\n') {
1175 ctxt->input->line++; ctxt->input->col = 1;
1176 }
1177 cur++;
1178 res++;
1179 if (*cur == 0) {
1180 ctxt->input->cur = cur;
1181 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1182 cur = ctxt->input->cur;
1183 }
1184 }
1185 ctxt->input->cur = cur;
1186 } else {
1187 int cur;
1188 do {
1189 cur = CUR;
1190 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1191 NEXT;
1192 cur = CUR;
1193 res++;
1194 }
1195 while ((cur == 0) && (ctxt->inputNr > 1) &&
1196 (ctxt->instate != XML_PARSER_COMMENT)) {
1197 xmlPopInput(ctxt);
1198 cur = CUR;
1199 }
1200 /*
1201 * Need to handle support of entities branching here
1202 */
1203 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1204 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1205 }
Owen Taylor3473f882001-02-23 17:55:21 +00001206 return(res);
1207}
1208
1209/************************************************************************
1210 * *
1211 * Commodity functions to handle entities *
1212 * *
1213 ************************************************************************/
1214
1215/**
1216 * xmlPopInput:
1217 * @ctxt: an XML parser context
1218 *
1219 * xmlPopInput: the current input pointed by ctxt->input came to an end
1220 * pop it and return the next char.
1221 *
1222 * Returns the current xmlChar in the parser context
1223 */
1224xmlChar
1225xmlPopInput(xmlParserCtxtPtr ctxt) {
1226 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1227 if (xmlParserDebugEntities)
1228 xmlGenericError(xmlGenericErrorContext,
1229 "Popping input %d\n", ctxt->inputNr);
1230 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001231 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001232 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1233 return(xmlPopInput(ctxt));
1234 return(CUR);
1235}
1236
1237/**
1238 * xmlPushInput:
1239 * @ctxt: an XML parser context
1240 * @input: an XML parser input fragment (entity, XML fragment ...).
1241 *
1242 * xmlPushInput: switch to a new input stream which is stacked on top
1243 * of the previous one(s).
1244 */
1245void
1246xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1247 if (input == NULL) return;
1248
1249 if (xmlParserDebugEntities) {
1250 if ((ctxt->input != NULL) && (ctxt->input->filename))
1251 xmlGenericError(xmlGenericErrorContext,
1252 "%s(%d): ", ctxt->input->filename,
1253 ctxt->input->line);
1254 xmlGenericError(xmlGenericErrorContext,
1255 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1256 }
1257 inputPush(ctxt, input);
1258 GROW;
1259}
1260
1261/**
1262 * xmlParseCharRef:
1263 * @ctxt: an XML parser context
1264 *
1265 * parse Reference declarations
1266 *
1267 * [66] CharRef ::= '&#' [0-9]+ ';' |
1268 * '&#x' [0-9a-fA-F]+ ';'
1269 *
1270 * [ WFC: Legal Character ]
1271 * Characters referred to using character references must match the
1272 * production for Char.
1273 *
1274 * Returns the value parsed (as an int), 0 in case of error
1275 */
1276int
1277xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001278 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001279 int count = 0;
1280
Owen Taylor3473f882001-02-23 17:55:21 +00001281 /*
1282 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1283 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001284 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001285 (NXT(2) == 'x')) {
1286 SKIP(3);
1287 GROW;
1288 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001289 if (count++ > 20) {
1290 count = 0;
1291 GROW;
1292 }
1293 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001294 val = val * 16 + (CUR - '0');
1295 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1296 val = val * 16 + (CUR - 'a') + 10;
1297 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1298 val = val * 16 + (CUR - 'A') + 10;
1299 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001300 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001301 val = 0;
1302 break;
1303 }
1304 NEXT;
1305 count++;
1306 }
1307 if (RAW == ';') {
1308 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001309 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001310 ctxt->nbChars ++;
1311 ctxt->input->cur++;
1312 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001313 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001314 SKIP(2);
1315 GROW;
1316 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001317 if (count++ > 20) {
1318 count = 0;
1319 GROW;
1320 }
1321 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001322 val = val * 10 + (CUR - '0');
1323 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001324 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001325 val = 0;
1326 break;
1327 }
1328 NEXT;
1329 count++;
1330 }
1331 if (RAW == ';') {
1332 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001333 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001334 ctxt->nbChars ++;
1335 ctxt->input->cur++;
1336 }
1337 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001338 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001339 }
1340
1341 /*
1342 * [ WFC: Legal Character ]
1343 * Characters referred to using character references must match the
1344 * production for Char.
1345 */
1346 if (IS_CHAR(val)) {
1347 return(val);
1348 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001349 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1350 "xmlParseCharRef: invalid xmlChar value %d\n",
1351 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001352 }
1353 return(0);
1354}
1355
1356/**
1357 * xmlParseStringCharRef:
1358 * @ctxt: an XML parser context
1359 * @str: a pointer to an index in the string
1360 *
1361 * parse Reference declarations, variant parsing from a string rather
1362 * than an an input flow.
1363 *
1364 * [66] CharRef ::= '&#' [0-9]+ ';' |
1365 * '&#x' [0-9a-fA-F]+ ';'
1366 *
1367 * [ WFC: Legal Character ]
1368 * Characters referred to using character references must match the
1369 * production for Char.
1370 *
1371 * Returns the value parsed (as an int), 0 in case of error, str will be
1372 * updated to the current value of the index
1373 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001374static int
Owen Taylor3473f882001-02-23 17:55:21 +00001375xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1376 const xmlChar *ptr;
1377 xmlChar cur;
1378 int val = 0;
1379
1380 if ((str == NULL) || (*str == NULL)) return(0);
1381 ptr = *str;
1382 cur = *ptr;
1383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1384 ptr += 3;
1385 cur = *ptr;
1386 while (cur != ';') { /* Non input consuming loop */
1387 if ((cur >= '0') && (cur <= '9'))
1388 val = val * 16 + (cur - '0');
1389 else if ((cur >= 'a') && (cur <= 'f'))
1390 val = val * 16 + (cur - 'a') + 10;
1391 else if ((cur >= 'A') && (cur <= 'F'))
1392 val = val * 16 + (cur - 'A') + 10;
1393 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001395 val = 0;
1396 break;
1397 }
1398 ptr++;
1399 cur = *ptr;
1400 }
1401 if (cur == ';')
1402 ptr++;
1403 } else if ((cur == '&') && (ptr[1] == '#')){
1404 ptr += 2;
1405 cur = *ptr;
1406 while (cur != ';') { /* Non input consuming loops */
1407 if ((cur >= '0') && (cur <= '9'))
1408 val = val * 10 + (cur - '0');
1409 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001410 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001411 val = 0;
1412 break;
1413 }
1414 ptr++;
1415 cur = *ptr;
1416 }
1417 if (cur == ';')
1418 ptr++;
1419 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001420 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001421 return(0);
1422 }
1423 *str = ptr;
1424
1425 /*
1426 * [ WFC: Legal Character ]
1427 * Characters referred to using character references must match the
1428 * production for Char.
1429 */
1430 if (IS_CHAR(val)) {
1431 return(val);
1432 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001433 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1434 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1435 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 }
1437 return(0);
1438}
1439
1440/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001441 * xmlNewBlanksWrapperInputStream:
1442 * @ctxt: an XML parser context
1443 * @entity: an Entity pointer
1444 *
1445 * Create a new input stream for wrapping
1446 * blanks around a PEReference
1447 *
1448 * Returns the new input stream or NULL
1449 */
1450
1451static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1452
Daniel Veillardf4862f02002-09-10 11:13:43 +00001453static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001454xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1455 xmlParserInputPtr input;
1456 xmlChar *buffer;
1457 size_t length;
1458 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001459 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1460 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001461 return(NULL);
1462 }
1463 if (xmlParserDebugEntities)
1464 xmlGenericError(xmlGenericErrorContext,
1465 "new blanks wrapper for entity: %s\n", entity->name);
1466 input = xmlNewInputStream(ctxt);
1467 if (input == NULL) {
1468 return(NULL);
1469 }
1470 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001471 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001472 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001473 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001474 return(NULL);
1475 }
1476 buffer [0] = ' ';
1477 buffer [1] = '%';
1478 buffer [length-3] = ';';
1479 buffer [length-2] = ' ';
1480 buffer [length-1] = 0;
1481 memcpy(buffer + 2, entity->name, length - 5);
1482 input->free = deallocblankswrapper;
1483 input->base = buffer;
1484 input->cur = buffer;
1485 input->length = length;
1486 input->end = &buffer[length];
1487 return(input);
1488}
1489
1490/**
Owen Taylor3473f882001-02-23 17:55:21 +00001491 * xmlParserHandlePEReference:
1492 * @ctxt: the parser context
1493 *
1494 * [69] PEReference ::= '%' Name ';'
1495 *
1496 * [ WFC: No Recursion ]
1497 * A parsed entity must not contain a recursive
1498 * reference to itself, either directly or indirectly.
1499 *
1500 * [ WFC: Entity Declared ]
1501 * In a document without any DTD, a document with only an internal DTD
1502 * subset which contains no parameter entity references, or a document
1503 * with "standalone='yes'", ... ... The declaration of a parameter
1504 * entity must precede any reference to it...
1505 *
1506 * [ VC: Entity Declared ]
1507 * In a document with an external subset or external parameter entities
1508 * with "standalone='no'", ... ... The declaration of a parameter entity
1509 * must precede any reference to it...
1510 *
1511 * [ WFC: In DTD ]
1512 * Parameter-entity references may only appear in the DTD.
1513 * NOTE: misleading but this is handled.
1514 *
1515 * A PEReference may have been detected in the current input stream
1516 * the handling is done accordingly to
1517 * http://www.w3.org/TR/REC-xml#entproc
1518 * i.e.
1519 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001520 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001521 */
1522void
1523xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001524 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001525 xmlEntityPtr entity = NULL;
1526 xmlParserInputPtr input;
1527
Owen Taylor3473f882001-02-23 17:55:21 +00001528 if (RAW != '%') return;
1529 switch(ctxt->instate) {
1530 case XML_PARSER_CDATA_SECTION:
1531 return;
1532 case XML_PARSER_COMMENT:
1533 return;
1534 case XML_PARSER_START_TAG:
1535 return;
1536 case XML_PARSER_END_TAG:
1537 return;
1538 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001539 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001540 return;
1541 case XML_PARSER_PROLOG:
1542 case XML_PARSER_START:
1543 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001544 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001545 return;
1546 case XML_PARSER_ENTITY_DECL:
1547 case XML_PARSER_CONTENT:
1548 case XML_PARSER_ATTRIBUTE_VALUE:
1549 case XML_PARSER_PI:
1550 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001551 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001552 /* we just ignore it there */
1553 return;
1554 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001555 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001556 return;
1557 case XML_PARSER_ENTITY_VALUE:
1558 /*
1559 * NOTE: in the case of entity values, we don't do the
1560 * substitution here since we need the literal
1561 * entity value to be able to save the internal
1562 * subset of the document.
1563 * This will be handled by xmlStringDecodeEntities
1564 */
1565 return;
1566 case XML_PARSER_DTD:
1567 /*
1568 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1569 * In the internal DTD subset, parameter-entity references
1570 * can occur only where markup declarations can occur, not
1571 * within markup declarations.
1572 * In that case this is handled in xmlParseMarkupDecl
1573 */
1574 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1575 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001576 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1577 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001578 break;
1579 case XML_PARSER_IGNORE:
1580 return;
1581 }
1582
1583 NEXT;
1584 name = xmlParseName(ctxt);
1585 if (xmlParserDebugEntities)
1586 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001587 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001588 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001589 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001590 } else {
1591 if (RAW == ';') {
1592 NEXT;
1593 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1594 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1595 if (entity == NULL) {
1596
1597 /*
1598 * [ WFC: Entity Declared ]
1599 * In a document without any DTD, a document with only an
1600 * internal DTD subset which contains no parameter entity
1601 * references, or a document with "standalone='yes'", ...
1602 * ... The declaration of a parameter entity must precede
1603 * any reference to it...
1604 */
1605 if ((ctxt->standalone == 1) ||
1606 ((ctxt->hasExternalSubset == 0) &&
1607 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001608 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001609 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001610 } else {
1611 /*
1612 * [ VC: Entity Declared ]
1613 * In a document with an external subset or external
1614 * parameter entities with "standalone='no'", ...
1615 * ... The declaration of a parameter entity must precede
1616 * any reference to it...
1617 */
1618 if ((!ctxt->disableSAX) &&
1619 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1620 ctxt->vctxt.error(ctxt->vctxt.userData,
1621 "PEReference: %%%s; not found\n", name);
1622 } else if ((!ctxt->disableSAX) &&
1623 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1624 ctxt->sax->warning(ctxt->userData,
1625 "PEReference: %%%s; not found\n", name);
1626 ctxt->valid = 0;
1627 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001628 } else if (ctxt->input->free != deallocblankswrapper) {
1629 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1630 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001631 } else {
1632 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1633 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001634 xmlChar start[4];
1635 xmlCharEncoding enc;
1636
Owen Taylor3473f882001-02-23 17:55:21 +00001637 /*
1638 * handle the extra spaces added before and after
1639 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001640 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001641 */
1642 input = xmlNewEntityInputStream(ctxt, entity);
1643 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001644
1645 /*
1646 * Get the 4 first bytes and decode the charset
1647 * if enc != XML_CHAR_ENCODING_NONE
1648 * plug some encoding conversion routines.
1649 */
1650 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001651 if (entity->length >= 4) {
1652 start[0] = RAW;
1653 start[1] = NXT(1);
1654 start[2] = NXT(2);
1655 start[3] = NXT(3);
1656 enc = xmlDetectCharEncoding(start, 4);
1657 if (enc != XML_CHAR_ENCODING_NONE) {
1658 xmlSwitchEncoding(ctxt, enc);
1659 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001660 }
1661
Owen Taylor3473f882001-02-23 17:55:21 +00001662 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1663 (RAW == '<') && (NXT(1) == '?') &&
1664 (NXT(2) == 'x') && (NXT(3) == 'm') &&
1665 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1666 xmlParseTextDecl(ctxt);
1667 }
Owen Taylor3473f882001-02-23 17:55:21 +00001668 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001669 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1670 "PEReference: %s is not a parameter entity\n",
1671 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001672 }
1673 }
1674 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001675 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001676 }
Owen Taylor3473f882001-02-23 17:55:21 +00001677 }
1678}
1679
1680/*
1681 * Macro used to grow the current buffer.
1682 */
1683#define growBuffer(buffer) { \
1684 buffer##_size *= 2; \
1685 buffer = (xmlChar *) \
1686 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001687 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001688}
1689
1690/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001691 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001692 * @ctxt: the parser context
1693 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001695 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1696 * @end: an end marker xmlChar, 0 if none
1697 * @end2: an end marker xmlChar, 0 if none
1698 * @end3: an end marker xmlChar, 0 if none
1699 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001700 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001701 *
1702 * [67] Reference ::= EntityRef | CharRef
1703 *
1704 * [69] PEReference ::= '%' Name ';'
1705 *
1706 * Returns A newly allocated string with the substitution done. The caller
1707 * must deallocate it !
1708 */
1709xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001710xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1711 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001712 xmlChar *buffer = NULL;
1713 int buffer_size = 0;
1714
1715 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001716 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001717 xmlEntityPtr ent;
1718 int c,l;
1719 int nbchars = 0;
1720
Daniel Veillarde57ec792003-09-10 10:50:59 +00001721 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001722 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001723 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001724
1725 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001726 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001727 return(NULL);
1728 }
1729
1730 /*
1731 * allocate a translation buffer.
1732 */
1733 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001734 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001735 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001736
1737 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001738 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001739 * we are operating on already parsed values.
1740 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001741 if (str < last)
1742 c = CUR_SCHAR(str, l);
1743 else
1744 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001745 while ((c != 0) && (c != end) && /* non input consuming loop */
1746 (c != end2) && (c != end3)) {
1747
1748 if (c == 0) break;
1749 if ((c == '&') && (str[1] == '#')) {
1750 int val = xmlParseStringCharRef(ctxt, &str);
1751 if (val != 0) {
1752 COPY_BUF(0,buffer,nbchars,val);
1753 }
1754 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1755 if (xmlParserDebugEntities)
1756 xmlGenericError(xmlGenericErrorContext,
1757 "String decoding Entity Reference: %.30s\n",
1758 str);
1759 ent = xmlParseStringEntityRef(ctxt, &str);
1760 if ((ent != NULL) &&
1761 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1762 if (ent->content != NULL) {
1763 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1764 } else {
1765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1766 ctxt->sax->error(ctxt->userData,
1767 "internal error entity has no content\n");
1768 }
1769 } else if ((ent != NULL) && (ent->content != NULL)) {
1770 xmlChar *rep;
1771
1772 ctxt->depth++;
1773 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1774 0, 0, 0);
1775 ctxt->depth--;
1776 if (rep != NULL) {
1777 current = rep;
1778 while (*current != 0) { /* non input consuming loop */
1779 buffer[nbchars++] = *current++;
1780 if (nbchars >
1781 buffer_size - XML_PARSER_BUFFER_SIZE) {
1782 growBuffer(buffer);
1783 }
1784 }
1785 xmlFree(rep);
1786 }
1787 } else if (ent != NULL) {
1788 int i = xmlStrlen(ent->name);
1789 const xmlChar *cur = ent->name;
1790
1791 buffer[nbchars++] = '&';
1792 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1793 growBuffer(buffer);
1794 }
1795 for (;i > 0;i--)
1796 buffer[nbchars++] = *cur++;
1797 buffer[nbchars++] = ';';
1798 }
1799 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1800 if (xmlParserDebugEntities)
1801 xmlGenericError(xmlGenericErrorContext,
1802 "String decoding PE Reference: %.30s\n", str);
1803 ent = xmlParseStringPEReference(ctxt, &str);
1804 if (ent != NULL) {
1805 xmlChar *rep;
1806
1807 ctxt->depth++;
1808 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1809 0, 0, 0);
1810 ctxt->depth--;
1811 if (rep != NULL) {
1812 current = rep;
1813 while (*current != 0) { /* non input consuming loop */
1814 buffer[nbchars++] = *current++;
1815 if (nbchars >
1816 buffer_size - XML_PARSER_BUFFER_SIZE) {
1817 growBuffer(buffer);
1818 }
1819 }
1820 xmlFree(rep);
1821 }
1822 }
1823 } else {
1824 COPY_BUF(l,buffer,nbchars,c);
1825 str += l;
1826 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1827 growBuffer(buffer);
1828 }
1829 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830 if (str < last)
1831 c = CUR_SCHAR(str, l);
1832 else
1833 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001834 }
1835 buffer[nbchars++] = 0;
1836 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001837
1838mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001839 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001840 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841}
1842
Daniel Veillarde57ec792003-09-10 10:50:59 +00001843/**
1844 * xmlStringDecodeEntities:
1845 * @ctxt: the parser context
1846 * @str: the input string
1847 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1848 * @end: an end marker xmlChar, 0 if none
1849 * @end2: an end marker xmlChar, 0 if none
1850 * @end3: an end marker xmlChar, 0 if none
1851 *
1852 * Takes a entity string content and process to do the adequate substitutions.
1853 *
1854 * [67] Reference ::= EntityRef | CharRef
1855 *
1856 * [69] PEReference ::= '%' Name ';'
1857 *
1858 * Returns A newly allocated string with the substitution done. The caller
1859 * must deallocate it !
1860 */
1861xmlChar *
1862xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1863 xmlChar end, xmlChar end2, xmlChar end3) {
1864 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1865 end, end2, end3));
1866}
Owen Taylor3473f882001-02-23 17:55:21 +00001867
1868/************************************************************************
1869 * *
1870 * Commodity functions to handle xmlChars *
1871 * *
1872 ************************************************************************/
1873
1874/**
1875 * xmlStrndup:
1876 * @cur: the input xmlChar *
1877 * @len: the len of @cur
1878 *
1879 * a strndup for array of xmlChar's
1880 *
1881 * Returns a new xmlChar * or NULL
1882 */
1883xmlChar *
1884xmlStrndup(const xmlChar *cur, int len) {
1885 xmlChar *ret;
1886
1887 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001888 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001889 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001890 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001891 return(NULL);
1892 }
1893 memcpy(ret, cur, len * sizeof(xmlChar));
1894 ret[len] = 0;
1895 return(ret);
1896}
1897
1898/**
1899 * xmlStrdup:
1900 * @cur: the input xmlChar *
1901 *
1902 * a strdup for array of xmlChar's. Since they are supposed to be
1903 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1904 * a termination mark of '0'.
1905 *
1906 * Returns a new xmlChar * or NULL
1907 */
1908xmlChar *
1909xmlStrdup(const xmlChar *cur) {
1910 const xmlChar *p = cur;
1911
1912 if (cur == NULL) return(NULL);
1913 while (*p != 0) p++; /* non input consuming */
1914 return(xmlStrndup(cur, p - cur));
1915}
1916
1917/**
1918 * xmlCharStrndup:
1919 * @cur: the input char *
1920 * @len: the len of @cur
1921 *
1922 * a strndup for char's to xmlChar's
1923 *
1924 * Returns a new xmlChar * or NULL
1925 */
1926
1927xmlChar *
1928xmlCharStrndup(const char *cur, int len) {
1929 int i;
1930 xmlChar *ret;
1931
1932 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001933 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001934 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001935 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001936 return(NULL);
1937 }
1938 for (i = 0;i < len;i++)
1939 ret[i] = (xmlChar) cur[i];
1940 ret[len] = 0;
1941 return(ret);
1942}
1943
1944/**
1945 * xmlCharStrdup:
1946 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001947 *
1948 * a strdup for char's to xmlChar's
1949 *
1950 * Returns a new xmlChar * or NULL
1951 */
1952
1953xmlChar *
1954xmlCharStrdup(const char *cur) {
1955 const char *p = cur;
1956
1957 if (cur == NULL) return(NULL);
1958 while (*p != '\0') p++; /* non input consuming */
1959 return(xmlCharStrndup(cur, p - cur));
1960}
1961
1962/**
1963 * xmlStrcmp:
1964 * @str1: the first xmlChar *
1965 * @str2: the second xmlChar *
1966 *
1967 * a strcmp for xmlChar's
1968 *
1969 * Returns the integer result of the comparison
1970 */
1971
1972int
1973xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1974 register int tmp;
1975
1976 if (str1 == str2) return(0);
1977 if (str1 == NULL) return(-1);
1978 if (str2 == NULL) return(1);
1979 do {
1980 tmp = *str1++ - *str2;
1981 if (tmp != 0) return(tmp);
1982 } while (*str2++ != 0);
1983 return 0;
1984}
1985
1986/**
1987 * xmlStrEqual:
1988 * @str1: the first xmlChar *
1989 * @str2: the second xmlChar *
1990 *
1991 * Check if both string are equal of have same content
1992 * Should be a bit more readable and faster than xmlStrEqual()
1993 *
1994 * Returns 1 if they are equal, 0 if they are different
1995 */
1996
1997int
1998xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1999 if (str1 == str2) return(1);
2000 if (str1 == NULL) return(0);
2001 if (str2 == NULL) return(0);
2002 do {
2003 if (*str1++ != *str2) return(0);
2004 } while (*str2++);
2005 return(1);
2006}
2007
2008/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002009 * xmlStrQEqual:
2010 * @pref: the prefix of the QName
2011 * @name: the localname of the QName
2012 * @str: the second xmlChar *
2013 *
2014 * Check if a QName is Equal to a given string
2015 *
2016 * Returns 1 if they are equal, 0 if they are different
2017 */
2018
2019int
2020xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2021 if (pref == NULL) return(xmlStrEqual(name, str));
2022 if (name == NULL) return(0);
2023 if (str == NULL) return(0);
2024
2025 do {
2026 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002027 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002028 if (*str++ != ':') return(0);
2029 do {
2030 if (*name++ != *str) return(0);
2031 } while (*str++);
2032 return(1);
2033}
2034
2035/**
Owen Taylor3473f882001-02-23 17:55:21 +00002036 * xmlStrncmp:
2037 * @str1: the first xmlChar *
2038 * @str2: the second xmlChar *
2039 * @len: the max comparison length
2040 *
2041 * a strncmp for xmlChar's
2042 *
2043 * Returns the integer result of the comparison
2044 */
2045
2046int
2047xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2048 register int tmp;
2049
2050 if (len <= 0) return(0);
2051 if (str1 == str2) return(0);
2052 if (str1 == NULL) return(-1);
2053 if (str2 == NULL) return(1);
2054 do {
2055 tmp = *str1++ - *str2;
2056 if (tmp != 0 || --len == 0) return(tmp);
2057 } while (*str2++ != 0);
2058 return 0;
2059}
2060
Daniel Veillardb44025c2001-10-11 22:55:55 +00002061static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002062 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2063 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2064 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2065 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2066 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2067 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2068 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2069 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2070 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2071 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2072 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2073 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2074 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2075 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2076 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2077 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2078 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2079 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2080 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2081 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2082 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2083 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2084 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2085 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2086 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2087 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2088 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2089 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2090 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2091 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2092 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2093 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2094};
2095
2096/**
2097 * xmlStrcasecmp:
2098 * @str1: the first xmlChar *
2099 * @str2: the second xmlChar *
2100 *
2101 * a strcasecmp for xmlChar's
2102 *
2103 * Returns the integer result of the comparison
2104 */
2105
2106int
2107xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2108 register int tmp;
2109
2110 if (str1 == str2) return(0);
2111 if (str1 == NULL) return(-1);
2112 if (str2 == NULL) return(1);
2113 do {
2114 tmp = casemap[*str1++] - casemap[*str2];
2115 if (tmp != 0) return(tmp);
2116 } while (*str2++ != 0);
2117 return 0;
2118}
2119
2120/**
2121 * xmlStrncasecmp:
2122 * @str1: the first xmlChar *
2123 * @str2: the second xmlChar *
2124 * @len: the max comparison length
2125 *
2126 * a strncasecmp for xmlChar's
2127 *
2128 * Returns the integer result of the comparison
2129 */
2130
2131int
2132xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2133 register int tmp;
2134
2135 if (len <= 0) return(0);
2136 if (str1 == str2) return(0);
2137 if (str1 == NULL) return(-1);
2138 if (str2 == NULL) return(1);
2139 do {
2140 tmp = casemap[*str1++] - casemap[*str2];
2141 if (tmp != 0 || --len == 0) return(tmp);
2142 } while (*str2++ != 0);
2143 return 0;
2144}
2145
2146/**
2147 * xmlStrchr:
2148 * @str: the xmlChar * array
2149 * @val: the xmlChar to search
2150 *
2151 * a strchr for xmlChar's
2152 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002153 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002154 */
2155
2156const xmlChar *
2157xmlStrchr(const xmlChar *str, xmlChar val) {
2158 if (str == NULL) return(NULL);
2159 while (*str != 0) { /* non input consuming */
2160 if (*str == val) return((xmlChar *) str);
2161 str++;
2162 }
2163 return(NULL);
2164}
2165
2166/**
2167 * xmlStrstr:
2168 * @str: the xmlChar * array (haystack)
2169 * @val: the xmlChar to search (needle)
2170 *
2171 * a strstr for xmlChar's
2172 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002173 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002174 */
2175
2176const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002177xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002178 int n;
2179
2180 if (str == NULL) return(NULL);
2181 if (val == NULL) return(NULL);
2182 n = xmlStrlen(val);
2183
2184 if (n == 0) return(str);
2185 while (*str != 0) { /* non input consuming */
2186 if (*str == *val) {
2187 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2188 }
2189 str++;
2190 }
2191 return(NULL);
2192}
2193
2194/**
2195 * xmlStrcasestr:
2196 * @str: the xmlChar * array (haystack)
2197 * @val: the xmlChar to search (needle)
2198 *
2199 * a case-ignoring strstr for xmlChar's
2200 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002201 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002202 */
2203
2204const xmlChar *
2205xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2206 int n;
2207
2208 if (str == NULL) return(NULL);
2209 if (val == NULL) return(NULL);
2210 n = xmlStrlen(val);
2211
2212 if (n == 0) return(str);
2213 while (*str != 0) { /* non input consuming */
2214 if (casemap[*str] == casemap[*val])
2215 if (!xmlStrncasecmp(str, val, n)) return(str);
2216 str++;
2217 }
2218 return(NULL);
2219}
2220
2221/**
2222 * xmlStrsub:
2223 * @str: the xmlChar * array (haystack)
2224 * @start: the index of the first char (zero based)
2225 * @len: the length of the substring
2226 *
2227 * Extract a substring of a given string
2228 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002229 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002230 */
2231
2232xmlChar *
2233xmlStrsub(const xmlChar *str, int start, int len) {
2234 int i;
2235
2236 if (str == NULL) return(NULL);
2237 if (start < 0) return(NULL);
2238 if (len < 0) return(NULL);
2239
2240 for (i = 0;i < start;i++) {
2241 if (*str == 0) return(NULL);
2242 str++;
2243 }
2244 if (*str == 0) return(NULL);
2245 return(xmlStrndup(str, len));
2246}
2247
2248/**
2249 * xmlStrlen:
2250 * @str: the xmlChar * array
2251 *
2252 * length of a xmlChar's string
2253 *
2254 * Returns the number of xmlChar contained in the ARRAY.
2255 */
2256
2257int
2258xmlStrlen(const xmlChar *str) {
2259 int len = 0;
2260
2261 if (str == NULL) return(0);
2262 while (*str != 0) { /* non input consuming */
2263 str++;
2264 len++;
2265 }
2266 return(len);
2267}
2268
2269/**
2270 * xmlStrncat:
2271 * @cur: the original xmlChar * array
2272 * @add: the xmlChar * array added
2273 * @len: the length of @add
2274 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002275 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002276 * first bytes of @add.
2277 *
2278 * Returns a new xmlChar *, the original @cur is reallocated if needed
2279 * and should not be freed
2280 */
2281
2282xmlChar *
2283xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2284 int size;
2285 xmlChar *ret;
2286
2287 if ((add == NULL) || (len == 0))
2288 return(cur);
2289 if (cur == NULL)
2290 return(xmlStrndup(add, len));
2291
2292 size = xmlStrlen(cur);
2293 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2294 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002295 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return(cur);
2297 }
2298 memcpy(&ret[size], add, len * sizeof(xmlChar));
2299 ret[size + len] = 0;
2300 return(ret);
2301}
2302
2303/**
2304 * xmlStrcat:
2305 * @cur: the original xmlChar * array
2306 * @add: the xmlChar * array added
2307 *
2308 * a strcat for array of xmlChar's. Since they are supposed to be
2309 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2310 * a termination mark of '0'.
2311 *
2312 * Returns a new xmlChar * containing the concatenated string.
2313 */
2314xmlChar *
2315xmlStrcat(xmlChar *cur, const xmlChar *add) {
2316 const xmlChar *p = add;
2317
2318 if (add == NULL) return(cur);
2319 if (cur == NULL)
2320 return(xmlStrdup(add));
2321
2322 while (*p != 0) p++; /* non input consuming */
2323 return(xmlStrncat(cur, add, p - add));
2324}
2325
2326/************************************************************************
2327 * *
2328 * Commodity functions, cleanup needed ? *
2329 * *
2330 ************************************************************************/
2331
2332/**
2333 * areBlanks:
2334 * @ctxt: an XML parser context
2335 * @str: a xmlChar *
2336 * @len: the size of @str
2337 *
2338 * Is this a sequence of blank chars that one can ignore ?
2339 *
2340 * Returns 1 if ignorable 0 otherwise.
2341 */
2342
2343static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2344 int i, ret;
2345 xmlNodePtr lastChild;
2346
Daniel Veillard05c13a22001-09-09 08:38:09 +00002347 /*
2348 * Don't spend time trying to differentiate them, the same callback is
2349 * used !
2350 */
2351 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002352 return(0);
2353
Owen Taylor3473f882001-02-23 17:55:21 +00002354 /*
2355 * Check for xml:space value.
2356 */
2357 if (*(ctxt->space) == 1)
2358 return(0);
2359
2360 /*
2361 * Check that the string is made of blanks
2362 */
2363 for (i = 0;i < len;i++)
2364 if (!(IS_BLANK(str[i]))) return(0);
2365
2366 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002367 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002368 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002369 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002370 if (ctxt->myDoc != NULL) {
2371 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2372 if (ret == 0) return(1);
2373 if (ret == 1) return(0);
2374 }
2375
2376 /*
2377 * Otherwise, heuristic :-\
2378 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002379 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002380 if ((ctxt->node->children == NULL) &&
2381 (RAW == '<') && (NXT(1) == '/')) return(0);
2382
2383 lastChild = xmlGetLastChild(ctxt->node);
2384 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002385 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2386 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002387 } else if (xmlNodeIsText(lastChild))
2388 return(0);
2389 else if ((ctxt->node->children != NULL) &&
2390 (xmlNodeIsText(ctxt->node->children)))
2391 return(0);
2392 return(1);
2393}
2394
Owen Taylor3473f882001-02-23 17:55:21 +00002395/************************************************************************
2396 * *
2397 * Extra stuff for namespace support *
2398 * Relates to http://www.w3.org/TR/WD-xml-names *
2399 * *
2400 ************************************************************************/
2401
2402/**
2403 * xmlSplitQName:
2404 * @ctxt: an XML parser context
2405 * @name: an XML parser context
2406 * @prefix: a xmlChar **
2407 *
2408 * parse an UTF8 encoded XML qualified name string
2409 *
2410 * [NS 5] QName ::= (Prefix ':')? LocalPart
2411 *
2412 * [NS 6] Prefix ::= NCName
2413 *
2414 * [NS 7] LocalPart ::= NCName
2415 *
2416 * Returns the local part, and prefix is updated
2417 * to get the Prefix if any.
2418 */
2419
2420xmlChar *
2421xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2422 xmlChar buf[XML_MAX_NAMELEN + 5];
2423 xmlChar *buffer = NULL;
2424 int len = 0;
2425 int max = XML_MAX_NAMELEN;
2426 xmlChar *ret = NULL;
2427 const xmlChar *cur = name;
2428 int c;
2429
2430 *prefix = NULL;
2431
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002432 if (cur == NULL) return(NULL);
2433
Owen Taylor3473f882001-02-23 17:55:21 +00002434#ifndef XML_XML_NAMESPACE
2435 /* xml: prefix is not really a namespace */
2436 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2437 (cur[2] == 'l') && (cur[3] == ':'))
2438 return(xmlStrdup(name));
2439#endif
2440
Daniel Veillard597bc482003-07-24 16:08:28 +00002441 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002442 if (cur[0] == ':')
2443 return(xmlStrdup(name));
2444
2445 c = *cur++;
2446 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2447 buf[len++] = c;
2448 c = *cur++;
2449 }
2450 if (len >= max) {
2451 /*
2452 * Okay someone managed to make a huge name, so he's ready to pay
2453 * for the processing speed.
2454 */
2455 max = len * 2;
2456
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002457 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002458 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002459 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002460 return(NULL);
2461 }
2462 memcpy(buffer, buf, len);
2463 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2464 if (len + 10 > max) {
2465 max *= 2;
2466 buffer = (xmlChar *) xmlRealloc(buffer,
2467 max * sizeof(xmlChar));
2468 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002469 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002470 return(NULL);
2471 }
2472 }
2473 buffer[len++] = c;
2474 c = *cur++;
2475 }
2476 buffer[len] = 0;
2477 }
2478
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 /* nasty but well=formed
2480 if ((c == ':') && (*cur == 0)) {
2481 return(xmlStrdup(name));
2482 } */
2483
Owen Taylor3473f882001-02-23 17:55:21 +00002484 if (buffer == NULL)
2485 ret = xmlStrndup(buf, len);
2486 else {
2487 ret = buffer;
2488 buffer = NULL;
2489 max = XML_MAX_NAMELEN;
2490 }
2491
2492
2493 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002494 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002495 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002496 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002497 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002498 }
Owen Taylor3473f882001-02-23 17:55:21 +00002499 len = 0;
2500
Daniel Veillardbb284f42002-10-16 18:02:47 +00002501 /*
2502 * Check that the first character is proper to start
2503 * a new name
2504 */
2505 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2506 ((c >= 0x41) && (c <= 0x5A)) ||
2507 (c == '_') || (c == ':'))) {
2508 int l;
2509 int first = CUR_SCHAR(cur, l);
2510
2511 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002512 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002513 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002514 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002515 }
2516 }
2517 cur++;
2518
Owen Taylor3473f882001-02-23 17:55:21 +00002519 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2520 buf[len++] = c;
2521 c = *cur++;
2522 }
2523 if (len >= max) {
2524 /*
2525 * Okay someone managed to make a huge name, so he's ready to pay
2526 * for the processing speed.
2527 */
2528 max = len * 2;
2529
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002530 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002531 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002532 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002533 return(NULL);
2534 }
2535 memcpy(buffer, buf, len);
2536 while (c != 0) { /* tested bigname2.xml */
2537 if (len + 10 > max) {
2538 max *= 2;
2539 buffer = (xmlChar *) xmlRealloc(buffer,
2540 max * sizeof(xmlChar));
2541 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002542 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002543 return(NULL);
2544 }
2545 }
2546 buffer[len++] = c;
2547 c = *cur++;
2548 }
2549 buffer[len] = 0;
2550 }
2551
2552 if (buffer == NULL)
2553 ret = xmlStrndup(buf, len);
2554 else {
2555 ret = buffer;
2556 }
2557 }
2558
2559 return(ret);
2560}
2561
2562/************************************************************************
2563 * *
2564 * The parser itself *
2565 * Relates to http://www.w3.org/TR/REC-xml *
2566 * *
2567 ************************************************************************/
2568
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002569static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002570static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002571 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002572
Owen Taylor3473f882001-02-23 17:55:21 +00002573/**
2574 * xmlParseName:
2575 * @ctxt: an XML parser context
2576 *
2577 * parse an XML name.
2578 *
2579 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2580 * CombiningChar | Extender
2581 *
2582 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2583 *
2584 * [6] Names ::= Name (S Name)*
2585 *
2586 * Returns the Name parsed or NULL
2587 */
2588
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002589const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002590xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002591 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002592 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002593 int count = 0;
2594
2595 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002596
2597 /*
2598 * Accelerator for simple ASCII names
2599 */
2600 in = ctxt->input->cur;
2601 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2602 ((*in >= 0x41) && (*in <= 0x5A)) ||
2603 (*in == '_') || (*in == ':')) {
2604 in++;
2605 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2606 ((*in >= 0x41) && (*in <= 0x5A)) ||
2607 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002608 (*in == '_') || (*in == '-') ||
2609 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002610 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002611 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002612 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002613 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002614 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002615 ctxt->nbChars += count;
2616 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002617 if (ret == NULL)
2618 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002619 return(ret);
2620 }
2621 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002622 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002623}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002624
Daniel Veillard46de64e2002-05-29 08:21:33 +00002625/**
2626 * xmlParseNameAndCompare:
2627 * @ctxt: an XML parser context
2628 *
2629 * parse an XML name and compares for match
2630 * (specialized for endtag parsing)
2631 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002632 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2633 * and the name for mismatch
2634 */
2635
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002636static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002637xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2638 const xmlChar *cmp = other;
2639 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641
2642 GROW;
2643
2644 in = ctxt->input->cur;
2645 while (*in != 0 && *in == *cmp) {
2646 ++in;
2647 ++cmp;
2648 }
2649 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2650 /* success */
2651 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002652 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002653 }
2654 /* failure (or end of input buffer), check with full function */
2655 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002656 /* strings coming from the dictionnary direct compare possible */
2657 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659 }
2660 return ret;
2661}
2662
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002663static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002664xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002665 int len = 0, l;
2666 int c;
2667 int count = 0;
2668
2669 /*
2670 * Handler for more complex cases
2671 */
2672 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002673 c = CUR_CHAR(l);
2674 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2675 (!IS_LETTER(c) && (c != '_') &&
2676 (c != ':'))) {
2677 return(NULL);
2678 }
2679
2680 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2681 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2682 (c == '.') || (c == '-') ||
2683 (c == '_') || (c == ':') ||
2684 (IS_COMBINING(c)) ||
2685 (IS_EXTENDER(c)))) {
2686 if (count++ > 100) {
2687 count = 0;
2688 GROW;
2689 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002690 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002691 NEXTL(l);
2692 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002693 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002694 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002695}
2696
2697/**
2698 * xmlParseStringName:
2699 * @ctxt: an XML parser context
2700 * @str: a pointer to the string pointer (IN/OUT)
2701 *
2702 * parse an XML name.
2703 *
2704 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2705 * CombiningChar | Extender
2706 *
2707 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2708 *
2709 * [6] Names ::= Name (S Name)*
2710 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002711 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002712 * is updated to the current location in the string.
2713 */
2714
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002715static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002716xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2717 xmlChar buf[XML_MAX_NAMELEN + 5];
2718 const xmlChar *cur = *str;
2719 int len = 0, l;
2720 int c;
2721
2722 c = CUR_SCHAR(cur, l);
2723 if (!IS_LETTER(c) && (c != '_') &&
2724 (c != ':')) {
2725 return(NULL);
2726 }
2727
2728 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2729 (c == '.') || (c == '-') ||
2730 (c == '_') || (c == ':') ||
2731 (IS_COMBINING(c)) ||
2732 (IS_EXTENDER(c))) {
2733 COPY_BUF(l,buf,len,c);
2734 cur += l;
2735 c = CUR_SCHAR(cur, l);
2736 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2737 /*
2738 * Okay someone managed to make a huge name, so he's ready to pay
2739 * for the processing speed.
2740 */
2741 xmlChar *buffer;
2742 int max = len * 2;
2743
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002744 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002745 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 return(NULL);
2748 }
2749 memcpy(buffer, buf, len);
2750 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2751 (c == '.') || (c == '-') ||
2752 (c == '_') || (c == ':') ||
2753 (IS_COMBINING(c)) ||
2754 (IS_EXTENDER(c))) {
2755 if (len + 10 > max) {
2756 max *= 2;
2757 buffer = (xmlChar *) xmlRealloc(buffer,
2758 max * sizeof(xmlChar));
2759 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 return(NULL);
2762 }
2763 }
2764 COPY_BUF(l,buffer,len,c);
2765 cur += l;
2766 c = CUR_SCHAR(cur, l);
2767 }
2768 buffer[len] = 0;
2769 *str = cur;
2770 return(buffer);
2771 }
2772 }
2773 *str = cur;
2774 return(xmlStrndup(buf, len));
2775}
2776
2777/**
2778 * xmlParseNmtoken:
2779 * @ctxt: an XML parser context
2780 *
2781 * parse an XML Nmtoken.
2782 *
2783 * [7] Nmtoken ::= (NameChar)+
2784 *
2785 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2786 *
2787 * Returns the Nmtoken parsed or NULL
2788 */
2789
2790xmlChar *
2791xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2792 xmlChar buf[XML_MAX_NAMELEN + 5];
2793 int len = 0, l;
2794 int c;
2795 int count = 0;
2796
2797 GROW;
2798 c = CUR_CHAR(l);
2799
2800 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2801 (c == '.') || (c == '-') ||
2802 (c == '_') || (c == ':') ||
2803 (IS_COMBINING(c)) ||
2804 (IS_EXTENDER(c))) {
2805 if (count++ > 100) {
2806 count = 0;
2807 GROW;
2808 }
2809 COPY_BUF(l,buf,len,c);
2810 NEXTL(l);
2811 c = CUR_CHAR(l);
2812 if (len >= XML_MAX_NAMELEN) {
2813 /*
2814 * Okay someone managed to make a huge token, so he's ready to pay
2815 * for the processing speed.
2816 */
2817 xmlChar *buffer;
2818 int max = len * 2;
2819
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002820 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002821 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002822 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002823 return(NULL);
2824 }
2825 memcpy(buffer, buf, len);
2826 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2827 (c == '.') || (c == '-') ||
2828 (c == '_') || (c == ':') ||
2829 (IS_COMBINING(c)) ||
2830 (IS_EXTENDER(c))) {
2831 if (count++ > 100) {
2832 count = 0;
2833 GROW;
2834 }
2835 if (len + 10 > max) {
2836 max *= 2;
2837 buffer = (xmlChar *) xmlRealloc(buffer,
2838 max * sizeof(xmlChar));
2839 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002840 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002841 return(NULL);
2842 }
2843 }
2844 COPY_BUF(l,buffer,len,c);
2845 NEXTL(l);
2846 c = CUR_CHAR(l);
2847 }
2848 buffer[len] = 0;
2849 return(buffer);
2850 }
2851 }
2852 if (len == 0)
2853 return(NULL);
2854 return(xmlStrndup(buf, len));
2855}
2856
2857/**
2858 * xmlParseEntityValue:
2859 * @ctxt: an XML parser context
2860 * @orig: if non-NULL store a copy of the original entity value
2861 *
2862 * parse a value for ENTITY declarations
2863 *
2864 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2865 * "'" ([^%&'] | PEReference | Reference)* "'"
2866 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002867 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002868 */
2869
2870xmlChar *
2871xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2872 xmlChar *buf = NULL;
2873 int len = 0;
2874 int size = XML_PARSER_BUFFER_SIZE;
2875 int c, l;
2876 xmlChar stop;
2877 xmlChar *ret = NULL;
2878 const xmlChar *cur = NULL;
2879 xmlParserInputPtr input;
2880
2881 if (RAW == '"') stop = '"';
2882 else if (RAW == '\'') stop = '\'';
2883 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002884 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002885 return(NULL);
2886 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002887 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002888 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002889 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(NULL);
2891 }
2892
2893 /*
2894 * The content of the entity definition is copied in a buffer.
2895 */
2896
2897 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2898 input = ctxt->input;
2899 GROW;
2900 NEXT;
2901 c = CUR_CHAR(l);
2902 /*
2903 * NOTE: 4.4.5 Included in Literal
2904 * When a parameter entity reference appears in a literal entity
2905 * value, ... a single or double quote character in the replacement
2906 * text is always treated as a normal data character and will not
2907 * terminate the literal.
2908 * In practice it means we stop the loop only when back at parsing
2909 * the initial entity and the quote is found
2910 */
2911 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2912 (ctxt->input != input))) {
2913 if (len + 5 >= size) {
2914 size *= 2;
2915 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
2920 }
2921 COPY_BUF(l,buf,len,c);
2922 NEXTL(l);
2923 /*
2924 * Pop-up of finished entities.
2925 */
2926 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2927 xmlPopInput(ctxt);
2928
2929 GROW;
2930 c = CUR_CHAR(l);
2931 if (c == 0) {
2932 GROW;
2933 c = CUR_CHAR(l);
2934 }
2935 }
2936 buf[len] = 0;
2937
2938 /*
2939 * Raise problem w.r.t. '&' and '%' being used in non-entities
2940 * reference constructs. Note Charref will be handled in
2941 * xmlStringDecodeEntities()
2942 */
2943 cur = buf;
2944 while (*cur != 0) { /* non input consuming */
2945 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2946 xmlChar *name;
2947 xmlChar tmp = *cur;
2948
2949 cur++;
2950 name = xmlParseStringName(ctxt, &cur);
2951 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002952 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002953 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002955 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002956 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2957 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002958 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002959 }
2960 if (name != NULL)
2961 xmlFree(name);
2962 }
2963 cur++;
2964 }
2965
2966 /*
2967 * Then PEReference entities are substituted.
2968 */
2969 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002970 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002971 xmlFree(buf);
2972 } else {
2973 NEXT;
2974 /*
2975 * NOTE: 4.4.7 Bypassed
2976 * When a general entity reference appears in the EntityValue in
2977 * an entity declaration, it is bypassed and left as is.
2978 * so XML_SUBSTITUTE_REF is not set here.
2979 */
2980 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2981 0, 0, 0);
2982 if (orig != NULL)
2983 *orig = buf;
2984 else
2985 xmlFree(buf);
2986 }
2987
2988 return(ret);
2989}
2990
2991/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002992 * xmlParseAttValueComplex:
2993 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002994 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002995 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 *
2997 * parse a value for an attribute, this is the fallback function
2998 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3002 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003003static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003004xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003005 xmlChar limit = 0;
3006 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003007 int len = 0;
3008 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003009 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlChar *current = NULL;
3011 xmlEntityPtr ent;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 if (NXT(0) == '"') {
3014 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3015 limit = '"';
3016 NEXT;
3017 } else if (NXT(0) == '\'') {
3018 limit = '\'';
3019 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3020 NEXT;
3021 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003022 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003023 return(NULL);
3024 }
3025
3026 /*
3027 * allocate a translation buffer.
3028 */
3029 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003030 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003031 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003032
3033 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003034 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003035 */
3036 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003037 while ((NXT(0) != limit) && /* checked */
3038 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003039 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003040 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003041 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003042 if (NXT(1) == '#') {
3043 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003044
Owen Taylor3473f882001-02-23 17:55:21 +00003045 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003046 if (ctxt->replaceEntities) {
3047 if (len > buf_size - 10) {
3048 growBuffer(buf);
3049 }
3050 buf[len++] = '&';
3051 } else {
3052 /*
3053 * The reparsing will be done in xmlStringGetNodeList()
3054 * called by the attribute() function in SAX.c
3055 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003056 if (len > buf_size - 10) {
3057 growBuffer(buf);
3058 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003059 buf[len++] = '&';
3060 buf[len++] = '#';
3061 buf[len++] = '3';
3062 buf[len++] = '8';
3063 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003064 }
3065 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003066 if (len > buf_size - 10) {
3067 growBuffer(buf);
3068 }
Owen Taylor3473f882001-02-23 17:55:21 +00003069 len += xmlCopyChar(0, &buf[len], val);
3070 }
3071 } else {
3072 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003073 if ((ent != NULL) &&
3074 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3075 if (len > buf_size - 10) {
3076 growBuffer(buf);
3077 }
3078 if ((ctxt->replaceEntities == 0) &&
3079 (ent->content[0] == '&')) {
3080 buf[len++] = '&';
3081 buf[len++] = '#';
3082 buf[len++] = '3';
3083 buf[len++] = '8';
3084 buf[len++] = ';';
3085 } else {
3086 buf[len++] = ent->content[0];
3087 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003088 } else if ((ent != NULL) &&
3089 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003090 xmlChar *rep;
3091
3092 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3093 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003094 XML_SUBSTITUTE_REF,
3095 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003096 if (rep != NULL) {
3097 current = rep;
3098 while (*current != 0) { /* non input consuming */
3099 buf[len++] = *current++;
3100 if (len > buf_size - 10) {
3101 growBuffer(buf);
3102 }
3103 }
3104 xmlFree(rep);
3105 }
3106 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003107 if (len > buf_size - 10) {
3108 growBuffer(buf);
3109 }
Owen Taylor3473f882001-02-23 17:55:21 +00003110 if (ent->content != NULL)
3111 buf[len++] = ent->content[0];
3112 }
3113 } else if (ent != NULL) {
3114 int i = xmlStrlen(ent->name);
3115 const xmlChar *cur = ent->name;
3116
3117 /*
3118 * This may look absurd but is needed to detect
3119 * entities problems
3120 */
3121 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3122 (ent->content != NULL)) {
3123 xmlChar *rep;
3124 rep = xmlStringDecodeEntities(ctxt, ent->content,
3125 XML_SUBSTITUTE_REF, 0, 0, 0);
3126 if (rep != NULL)
3127 xmlFree(rep);
3128 }
3129
3130 /*
3131 * Just output the reference
3132 */
3133 buf[len++] = '&';
3134 if (len > buf_size - i - 10) {
3135 growBuffer(buf);
3136 }
3137 for (;i > 0;i--)
3138 buf[len++] = *cur++;
3139 buf[len++] = ';';
3140 }
3141 }
3142 } else {
3143 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003144 if ((len != 0) || (!normalize)) {
3145 if ((!normalize) || (!in_space)) {
3146 COPY_BUF(l,buf,len,0x20);
3147 if (len > buf_size - 10) {
3148 growBuffer(buf);
3149 }
3150 }
3151 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003152 }
3153 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003154 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003155 COPY_BUF(l,buf,len,c);
3156 if (len > buf_size - 10) {
3157 growBuffer(buf);
3158 }
3159 }
3160 NEXTL(l);
3161 }
3162 GROW;
3163 c = CUR_CHAR(l);
3164 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003165 if ((in_space) && (normalize)) {
3166 while (buf[len - 1] == 0x20) len--;
3167 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003168 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003169 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003170 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003171 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003172 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3173 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003174 } else
3175 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003176 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003177 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003178
3179mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003180 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003181 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003182}
3183
3184/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003185 * xmlParseAttValue:
3186 * @ctxt: an XML parser context
3187 *
3188 * parse a value for an attribute
3189 * Note: the parser won't do substitution of entities here, this
3190 * will be handled later in xmlStringGetNodeList
3191 *
3192 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3193 * "'" ([^<&'] | Reference)* "'"
3194 *
3195 * 3.3.3 Attribute-Value Normalization:
3196 * Before the value of an attribute is passed to the application or
3197 * checked for validity, the XML processor must normalize it as follows:
3198 * - a character reference is processed by appending the referenced
3199 * character to the attribute value
3200 * - an entity reference is processed by recursively processing the
3201 * replacement text of the entity
3202 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3203 * appending #x20 to the normalized value, except that only a single
3204 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3205 * parsed entity or the literal entity value of an internal parsed entity
3206 * - other characters are processed by appending them to the normalized value
3207 * If the declared value is not CDATA, then the XML processor must further
3208 * process the normalized attribute value by discarding any leading and
3209 * trailing space (#x20) characters, and by replacing sequences of space
3210 * (#x20) characters by a single space (#x20) character.
3211 * All attributes for which no declaration has been read should be treated
3212 * by a non-validating parser as if declared CDATA.
3213 *
3214 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3215 */
3216
3217
3218xmlChar *
3219xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003221}
3222
3223/**
Owen Taylor3473f882001-02-23 17:55:21 +00003224 * xmlParseSystemLiteral:
3225 * @ctxt: an XML parser context
3226 *
3227 * parse an XML Literal
3228 *
3229 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3230 *
3231 * Returns the SystemLiteral parsed or NULL
3232 */
3233
3234xmlChar *
3235xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3236 xmlChar *buf = NULL;
3237 int len = 0;
3238 int size = XML_PARSER_BUFFER_SIZE;
3239 int cur, l;
3240 xmlChar stop;
3241 int state = ctxt->instate;
3242 int count = 0;
3243
3244 SHRINK;
3245 if (RAW == '"') {
3246 NEXT;
3247 stop = '"';
3248 } else if (RAW == '\'') {
3249 NEXT;
3250 stop = '\'';
3251 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003252 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003253 return(NULL);
3254 }
3255
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003256 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003257 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003258 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003259 return(NULL);
3260 }
3261 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3262 cur = CUR_CHAR(l);
3263 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3264 if (len + 5 >= size) {
3265 size *= 2;
3266 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3267 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003268 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003269 ctxt->instate = (xmlParserInputState) state;
3270 return(NULL);
3271 }
3272 }
3273 count++;
3274 if (count > 50) {
3275 GROW;
3276 count = 0;
3277 }
3278 COPY_BUF(l,buf,len,cur);
3279 NEXTL(l);
3280 cur = CUR_CHAR(l);
3281 if (cur == 0) {
3282 GROW;
3283 SHRINK;
3284 cur = CUR_CHAR(l);
3285 }
3286 }
3287 buf[len] = 0;
3288 ctxt->instate = (xmlParserInputState) state;
3289 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003291 } else {
3292 NEXT;
3293 }
3294 return(buf);
3295}
3296
3297/**
3298 * xmlParsePubidLiteral:
3299 * @ctxt: an XML parser context
3300 *
3301 * parse an XML public literal
3302 *
3303 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3304 *
3305 * Returns the PubidLiteral parsed or NULL.
3306 */
3307
3308xmlChar *
3309xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3310 xmlChar *buf = NULL;
3311 int len = 0;
3312 int size = XML_PARSER_BUFFER_SIZE;
3313 xmlChar cur;
3314 xmlChar stop;
3315 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003316 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003317
3318 SHRINK;
3319 if (RAW == '"') {
3320 NEXT;
3321 stop = '"';
3322 } else if (RAW == '\'') {
3323 NEXT;
3324 stop = '\'';
3325 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003326 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003327 return(NULL);
3328 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003329 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003330 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003331 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003332 return(NULL);
3333 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003334 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003335 cur = CUR;
3336 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3337 if (len + 1 >= size) {
3338 size *= 2;
3339 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3340 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003341 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003342 return(NULL);
3343 }
3344 }
3345 buf[len++] = cur;
3346 count++;
3347 if (count > 50) {
3348 GROW;
3349 count = 0;
3350 }
3351 NEXT;
3352 cur = CUR;
3353 if (cur == 0) {
3354 GROW;
3355 SHRINK;
3356 cur = CUR;
3357 }
3358 }
3359 buf[len] = 0;
3360 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003361 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003362 } else {
3363 NEXT;
3364 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003365 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003366 return(buf);
3367}
3368
Daniel Veillard48b2f892001-02-25 16:11:03 +00003369void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003370/**
3371 * xmlParseCharData:
3372 * @ctxt: an XML parser context
3373 * @cdata: int indicating whether we are within a CDATA section
3374 *
3375 * parse a CharData section.
3376 * if we are within a CDATA section ']]>' marks an end of section.
3377 *
3378 * The right angle bracket (>) may be represented using the string "&gt;",
3379 * and must, for compatibility, be escaped using "&gt;" or a character
3380 * reference when it appears in the string "]]>" in content, when that
3381 * string is not marking the end of a CDATA section.
3382 *
3383 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3384 */
3385
3386void
3387xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003388 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003389 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003390 int line = ctxt->input->line;
3391 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003392
3393 SHRINK;
3394 GROW;
3395 /*
3396 * Accelerated common case where input don't need to be
3397 * modified before passing it to the handler.
3398 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003399 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003400 in = ctxt->input->cur;
3401 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003402get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003403 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3404 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003405 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003406 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003407 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003408 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003409 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003410 ctxt->input->line++;
3411 in++;
3412 }
3413 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003414 }
3415 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003416 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003417 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003418 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003419 return;
3420 }
3421 in++;
3422 goto get_more;
3423 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003424 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003425 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003426 if ((ctxt->sax->ignorableWhitespace !=
3427 ctxt->sax->characters) &&
3428 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003429 const xmlChar *tmp = ctxt->input->cur;
3430 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003431
Daniel Veillarda7374592001-05-10 14:17:55 +00003432 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003433 ctxt->sax->ignorableWhitespace(ctxt->userData,
3434 tmp, nbchar);
3435 } else if (ctxt->sax->characters != NULL)
3436 ctxt->sax->characters(ctxt->userData,
3437 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003438 line = ctxt->input->line;
3439 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003440 } else {
3441 if (ctxt->sax->characters != NULL)
3442 ctxt->sax->characters(ctxt->userData,
3443 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003444 line = ctxt->input->line;
3445 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003446 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003447 }
3448 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003449 if (*in == 0xD) {
3450 in++;
3451 if (*in == 0xA) {
3452 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003454 ctxt->input->line++;
3455 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003456 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003457 in--;
3458 }
3459 if (*in == '<') {
3460 return;
3461 }
3462 if (*in == '&') {
3463 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003464 }
3465 SHRINK;
3466 GROW;
3467 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003468 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003469 nbchar = 0;
3470 }
Daniel Veillard50582112001-03-26 22:52:16 +00003471 ctxt->input->line = line;
3472 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003473 xmlParseCharDataComplex(ctxt, cdata);
3474}
3475
Daniel Veillard01c13b52002-12-10 15:19:08 +00003476/**
3477 * xmlParseCharDataComplex:
3478 * @ctxt: an XML parser context
3479 * @cdata: int indicating whether we are within a CDATA section
3480 *
3481 * parse a CharData section.this is the fallback function
3482 * of xmlParseCharData() when the parsing requires handling
3483 * of non-ASCII characters.
3484 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003485void
3486xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003487 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3488 int nbchar = 0;
3489 int cur, l;
3490 int count = 0;
3491
3492 SHRINK;
3493 GROW;
3494 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003495 while ((cur != '<') && /* checked */
3496 (cur != '&') &&
3497 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003498 if ((cur == ']') && (NXT(1) == ']') &&
3499 (NXT(2) == '>')) {
3500 if (cdata) break;
3501 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003502 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003503 }
3504 }
3505 COPY_BUF(l,buf,nbchar,cur);
3506 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003507 buf[nbchar] = 0;
3508
Owen Taylor3473f882001-02-23 17:55:21 +00003509 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003510 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003511 */
3512 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3513 if (areBlanks(ctxt, buf, nbchar)) {
3514 if (ctxt->sax->ignorableWhitespace != NULL)
3515 ctxt->sax->ignorableWhitespace(ctxt->userData,
3516 buf, nbchar);
3517 } else {
3518 if (ctxt->sax->characters != NULL)
3519 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3520 }
3521 }
3522 nbchar = 0;
3523 }
3524 count++;
3525 if (count > 50) {
3526 GROW;
3527 count = 0;
3528 }
3529 NEXTL(l);
3530 cur = CUR_CHAR(l);
3531 }
3532 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003533 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003535 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003536 */
3537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3538 if (areBlanks(ctxt, buf, nbchar)) {
3539 if (ctxt->sax->ignorableWhitespace != NULL)
3540 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3541 } else {
3542 if (ctxt->sax->characters != NULL)
3543 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3544 }
3545 }
3546 }
3547}
3548
3549/**
3550 * xmlParseExternalID:
3551 * @ctxt: an XML parser context
3552 * @publicID: a xmlChar** receiving PubidLiteral
3553 * @strict: indicate whether we should restrict parsing to only
3554 * production [75], see NOTE below
3555 *
3556 * Parse an External ID or a Public ID
3557 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003558 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003559 * 'PUBLIC' S PubidLiteral S SystemLiteral
3560 *
3561 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3562 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3563 *
3564 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3565 *
3566 * Returns the function returns SystemLiteral and in the second
3567 * case publicID receives PubidLiteral, is strict is off
3568 * it is possible to return NULL and have publicID set.
3569 */
3570
3571xmlChar *
3572xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3573 xmlChar *URI = NULL;
3574
3575 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003576
3577 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003578 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3579 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3580 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3581 SKIP(6);
3582 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003583 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3584 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003585 }
3586 SKIP_BLANKS;
3587 URI = xmlParseSystemLiteral(ctxt);
3588 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003589 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003590 }
3591 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3592 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3593 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3594 SKIP(6);
3595 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003596 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003597 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003598 }
3599 SKIP_BLANKS;
3600 *publicID = xmlParsePubidLiteral(ctxt);
3601 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003602 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003603 }
3604 if (strict) {
3605 /*
3606 * We don't handle [83] so "S SystemLiteral" is required.
3607 */
3608 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003610 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003611 }
3612 } else {
3613 /*
3614 * We handle [83] so we return immediately, if
3615 * "S SystemLiteral" is not detected. From a purely parsing
3616 * point of view that's a nice mess.
3617 */
3618 const xmlChar *ptr;
3619 GROW;
3620
3621 ptr = CUR_PTR;
3622 if (!IS_BLANK(*ptr)) return(NULL);
3623
3624 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3625 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3626 }
3627 SKIP_BLANKS;
3628 URI = xmlParseSystemLiteral(ctxt);
3629 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 }
3632 }
3633 return(URI);
3634}
3635
3636/**
3637 * xmlParseComment:
3638 * @ctxt: an XML parser context
3639 *
3640 * Skip an XML (SGML) comment <!-- .... -->
3641 * The spec says that "For compatibility, the string "--" (double-hyphen)
3642 * must not occur within comments. "
3643 *
3644 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3645 */
3646void
3647xmlParseComment(xmlParserCtxtPtr ctxt) {
3648 xmlChar *buf = NULL;
3649 int len;
3650 int size = XML_PARSER_BUFFER_SIZE;
3651 int q, ql;
3652 int r, rl;
3653 int cur, l;
3654 xmlParserInputState state;
3655 xmlParserInputPtr input = ctxt->input;
3656 int count = 0;
3657
3658 /*
3659 * Check that there is a comment right here.
3660 */
3661 if ((RAW != '<') || (NXT(1) != '!') ||
3662 (NXT(2) != '-') || (NXT(3) != '-')) return;
3663
3664 state = ctxt->instate;
3665 ctxt->instate = XML_PARSER_COMMENT;
3666 SHRINK;
3667 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003668 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003669 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003670 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 ctxt->instate = state;
3672 return;
3673 }
3674 q = CUR_CHAR(ql);
3675 NEXTL(ql);
3676 r = CUR_CHAR(rl);
3677 NEXTL(rl);
3678 cur = CUR_CHAR(l);
3679 len = 0;
3680 while (IS_CHAR(cur) && /* checked */
3681 ((cur != '>') ||
3682 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003683 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003684 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 if (len + 5 >= size) {
3687 size *= 2;
3688 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3689 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003690 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003691 ctxt->instate = state;
3692 return;
3693 }
3694 }
3695 COPY_BUF(ql,buf,len,q);
3696 q = r;
3697 ql = rl;
3698 r = cur;
3699 rl = l;
3700
3701 count++;
3702 if (count > 50) {
3703 GROW;
3704 count = 0;
3705 }
3706 NEXTL(l);
3707 cur = CUR_CHAR(l);
3708 if (cur == 0) {
3709 SHRINK;
3710 GROW;
3711 cur = CUR_CHAR(l);
3712 }
3713 }
3714 buf[len] = 0;
3715 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003716 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003717 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003718 xmlFree(buf);
3719 } else {
3720 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003721 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3722 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003723 }
3724 NEXT;
3725 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3726 (!ctxt->disableSAX))
3727 ctxt->sax->comment(ctxt->userData, buf);
3728 xmlFree(buf);
3729 }
3730 ctxt->instate = state;
3731}
3732
3733/**
3734 * xmlParsePITarget:
3735 * @ctxt: an XML parser context
3736 *
3737 * parse the name of a PI
3738 *
3739 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3740 *
3741 * Returns the PITarget name or NULL
3742 */
3743
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003744const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003745xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003746 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003747
3748 name = xmlParseName(ctxt);
3749 if ((name != NULL) &&
3750 ((name[0] == 'x') || (name[0] == 'X')) &&
3751 ((name[1] == 'm') || (name[1] == 'M')) &&
3752 ((name[2] == 'l') || (name[2] == 'L'))) {
3753 int i;
3754 if ((name[0] == 'x') && (name[1] == 'm') &&
3755 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003756 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003757 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003758 return(name);
3759 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003760 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 return(name);
3762 }
3763 for (i = 0;;i++) {
3764 if (xmlW3CPIs[i] == NULL) break;
3765 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3766 return(name);
3767 }
3768 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3769 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3770 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003771 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003772 }
3773 }
3774 return(name);
3775}
3776
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003777#ifdef LIBXML_CATALOG_ENABLED
3778/**
3779 * xmlParseCatalogPI:
3780 * @ctxt: an XML parser context
3781 * @catalog: the PI value string
3782 *
3783 * parse an XML Catalog Processing Instruction.
3784 *
3785 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3786 *
3787 * Occurs only if allowed by the user and if happening in the Misc
3788 * part of the document before any doctype informations
3789 * This will add the given catalog to the parsing context in order
3790 * to be used if there is a resolution need further down in the document
3791 */
3792
3793static void
3794xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3795 xmlChar *URL = NULL;
3796 const xmlChar *tmp, *base;
3797 xmlChar marker;
3798
3799 tmp = catalog;
3800 while (IS_BLANK(*tmp)) tmp++;
3801 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3802 goto error;
3803 tmp += 7;
3804 while (IS_BLANK(*tmp)) tmp++;
3805 if (*tmp != '=') {
3806 return;
3807 }
3808 tmp++;
3809 while (IS_BLANK(*tmp)) tmp++;
3810 marker = *tmp;
3811 if ((marker != '\'') && (marker != '"'))
3812 goto error;
3813 tmp++;
3814 base = tmp;
3815 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3816 if (*tmp == 0)
3817 goto error;
3818 URL = xmlStrndup(base, tmp - base);
3819 tmp++;
3820 while (IS_BLANK(*tmp)) tmp++;
3821 if (*tmp != 0)
3822 goto error;
3823
3824 if (URL != NULL) {
3825 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3826 xmlFree(URL);
3827 }
3828 return;
3829
3830error:
3831 ctxt->errNo = XML_WAR_CATALOG_PI;
3832 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3833 ctxt->sax->warning(ctxt->userData,
3834 "Catalog PI syntax error: %s\n", catalog);
3835 if (URL != NULL)
3836 xmlFree(URL);
3837}
3838#endif
3839
Owen Taylor3473f882001-02-23 17:55:21 +00003840/**
3841 * xmlParsePI:
3842 * @ctxt: an XML parser context
3843 *
3844 * parse an XML Processing Instruction.
3845 *
3846 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3847 *
3848 * The processing is transfered to SAX once parsed.
3849 */
3850
3851void
3852xmlParsePI(xmlParserCtxtPtr ctxt) {
3853 xmlChar *buf = NULL;
3854 int len = 0;
3855 int size = XML_PARSER_BUFFER_SIZE;
3856 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003857 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003858 xmlParserInputState state;
3859 int count = 0;
3860
3861 if ((RAW == '<') && (NXT(1) == '?')) {
3862 xmlParserInputPtr input = ctxt->input;
3863 state = ctxt->instate;
3864 ctxt->instate = XML_PARSER_PI;
3865 /*
3866 * this is a Processing Instruction.
3867 */
3868 SKIP(2);
3869 SHRINK;
3870
3871 /*
3872 * Parse the target name and check for special support like
3873 * namespace.
3874 */
3875 target = xmlParsePITarget(ctxt);
3876 if (target != NULL) {
3877 if ((RAW == '?') && (NXT(1) == '>')) {
3878 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003879 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3880 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003881 }
3882 SKIP(2);
3883
3884 /*
3885 * SAX: PI detected.
3886 */
3887 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3888 (ctxt->sax->processingInstruction != NULL))
3889 ctxt->sax->processingInstruction(ctxt->userData,
3890 target, NULL);
3891 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003892 return;
3893 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003894 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003895 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003896 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003897 ctxt->instate = state;
3898 return;
3899 }
3900 cur = CUR;
3901 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003902 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3903 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003904 }
3905 SKIP_BLANKS;
3906 cur = CUR_CHAR(l);
3907 while (IS_CHAR(cur) && /* checked */
3908 ((cur != '?') || (NXT(1) != '>'))) {
3909 if (len + 5 >= size) {
3910 size *= 2;
3911 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3912 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003913 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003914 ctxt->instate = state;
3915 return;
3916 }
3917 }
3918 count++;
3919 if (count > 50) {
3920 GROW;
3921 count = 0;
3922 }
3923 COPY_BUF(l,buf,len,cur);
3924 NEXTL(l);
3925 cur = CUR_CHAR(l);
3926 if (cur == 0) {
3927 SHRINK;
3928 GROW;
3929 cur = CUR_CHAR(l);
3930 }
3931 }
3932 buf[len] = 0;
3933 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003934 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3935 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 } else {
3937 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3939 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003940 }
3941 SKIP(2);
3942
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003943#ifdef LIBXML_CATALOG_ENABLED
3944 if (((state == XML_PARSER_MISC) ||
3945 (state == XML_PARSER_START)) &&
3946 (xmlStrEqual(target, XML_CATALOG_PI))) {
3947 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3948 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3949 (allow == XML_CATA_ALLOW_ALL))
3950 xmlParseCatalogPI(ctxt, buf);
3951 }
3952#endif
3953
3954
Owen Taylor3473f882001-02-23 17:55:21 +00003955 /*
3956 * SAX: PI detected.
3957 */
3958 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3959 (ctxt->sax->processingInstruction != NULL))
3960 ctxt->sax->processingInstruction(ctxt->userData,
3961 target, buf);
3962 }
3963 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003964 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003965 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003966 }
3967 ctxt->instate = state;
3968 }
3969}
3970
3971/**
3972 * xmlParseNotationDecl:
3973 * @ctxt: an XML parser context
3974 *
3975 * parse a notation declaration
3976 *
3977 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3978 *
3979 * Hence there is actually 3 choices:
3980 * 'PUBLIC' S PubidLiteral
3981 * 'PUBLIC' S PubidLiteral S SystemLiteral
3982 * and 'SYSTEM' S SystemLiteral
3983 *
3984 * See the NOTE on xmlParseExternalID().
3985 */
3986
3987void
3988xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003989 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 xmlChar *Pubid;
3991 xmlChar *Systemid;
3992
3993 if ((RAW == '<') && (NXT(1) == '!') &&
3994 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3995 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3996 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3997 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3998 xmlParserInputPtr input = ctxt->input;
3999 SHRINK;
4000 SKIP(10);
4001 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004002 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4003 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004004 return;
4005 }
4006 SKIP_BLANKS;
4007
Daniel Veillard76d66f42001-05-16 21:05:17 +00004008 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004010 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004011 return;
4012 }
4013 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004014 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004015 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004016 return;
4017 }
4018 SKIP_BLANKS;
4019
4020 /*
4021 * Parse the IDs.
4022 */
4023 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4024 SKIP_BLANKS;
4025
4026 if (RAW == '>') {
4027 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004028 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4029 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004030 }
4031 NEXT;
4032 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4033 (ctxt->sax->notationDecl != NULL))
4034 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4035 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004036 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004037 }
Owen Taylor3473f882001-02-23 17:55:21 +00004038 if (Systemid != NULL) xmlFree(Systemid);
4039 if (Pubid != NULL) xmlFree(Pubid);
4040 }
4041}
4042
4043/**
4044 * xmlParseEntityDecl:
4045 * @ctxt: an XML parser context
4046 *
4047 * parse <!ENTITY declarations
4048 *
4049 * [70] EntityDecl ::= GEDecl | PEDecl
4050 *
4051 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4052 *
4053 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4054 *
4055 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4056 *
4057 * [74] PEDef ::= EntityValue | ExternalID
4058 *
4059 * [76] NDataDecl ::= S 'NDATA' S Name
4060 *
4061 * [ VC: Notation Declared ]
4062 * The Name must match the declared name of a notation.
4063 */
4064
4065void
4066xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004067 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004068 xmlChar *value = NULL;
4069 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004070 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 int isParameter = 0;
4072 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004073 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004074
4075 GROW;
4076 if ((RAW == '<') && (NXT(1) == '!') &&
4077 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4078 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4079 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
4080 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004081 SHRINK;
4082 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004083 skipped = SKIP_BLANKS;
4084 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4086 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004087 }
Owen Taylor3473f882001-02-23 17:55:21 +00004088
4089 if (RAW == '%') {
4090 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004091 skipped = SKIP_BLANKS;
4092 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004093 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4094 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004095 }
Owen Taylor3473f882001-02-23 17:55:21 +00004096 isParameter = 1;
4097 }
4098
Daniel Veillard76d66f42001-05-16 21:05:17 +00004099 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004100 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004101 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4102 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004103 return;
4104 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004105 skipped = SKIP_BLANKS;
4106 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004107 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4108 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004109 }
Owen Taylor3473f882001-02-23 17:55:21 +00004110
Daniel Veillardf5582f12002-06-11 10:08:16 +00004111 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004112 /*
4113 * handle the various case of definitions...
4114 */
4115 if (isParameter) {
4116 if ((RAW == '"') || (RAW == '\'')) {
4117 value = xmlParseEntityValue(ctxt, &orig);
4118 if (value) {
4119 if ((ctxt->sax != NULL) &&
4120 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4121 ctxt->sax->entityDecl(ctxt->userData, name,
4122 XML_INTERNAL_PARAMETER_ENTITY,
4123 NULL, NULL, value);
4124 }
4125 } else {
4126 URI = xmlParseExternalID(ctxt, &literal, 1);
4127 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004128 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 }
4130 if (URI) {
4131 xmlURIPtr uri;
4132
4133 uri = xmlParseURI((const char *) URI);
4134 if (uri == NULL) {
4135 ctxt->errNo = XML_ERR_INVALID_URI;
4136 if ((ctxt->sax != NULL) &&
4137 (!ctxt->disableSAX) &&
4138 (ctxt->sax->error != NULL))
4139 ctxt->sax->error(ctxt->userData,
4140 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004141 /*
4142 * This really ought to be a well formedness error
4143 * but the XML Core WG decided otherwise c.f. issue
4144 * E26 of the XML erratas.
4145 */
Owen Taylor3473f882001-02-23 17:55:21 +00004146 } else {
4147 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004148 /*
4149 * Okay this is foolish to block those but not
4150 * invalid URIs.
4151 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004152 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004153 } else {
4154 if ((ctxt->sax != NULL) &&
4155 (!ctxt->disableSAX) &&
4156 (ctxt->sax->entityDecl != NULL))
4157 ctxt->sax->entityDecl(ctxt->userData, name,
4158 XML_EXTERNAL_PARAMETER_ENTITY,
4159 literal, URI, NULL);
4160 }
4161 xmlFreeURI(uri);
4162 }
4163 }
4164 }
4165 } else {
4166 if ((RAW == '"') || (RAW == '\'')) {
4167 value = xmlParseEntityValue(ctxt, &orig);
4168 if ((ctxt->sax != NULL) &&
4169 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4170 ctxt->sax->entityDecl(ctxt->userData, name,
4171 XML_INTERNAL_GENERAL_ENTITY,
4172 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004173 /*
4174 * For expat compatibility in SAX mode.
4175 */
4176 if ((ctxt->myDoc == NULL) ||
4177 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4178 if (ctxt->myDoc == NULL) {
4179 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4180 }
4181 if (ctxt->myDoc->intSubset == NULL)
4182 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4183 BAD_CAST "fake", NULL, NULL);
4184
Daniel Veillard1af9a412003-08-20 22:54:39 +00004185 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4186 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004187 }
Owen Taylor3473f882001-02-23 17:55:21 +00004188 } else {
4189 URI = xmlParseExternalID(ctxt, &literal, 1);
4190 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004191 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 if (URI) {
4194 xmlURIPtr uri;
4195
4196 uri = xmlParseURI((const char *)URI);
4197 if (uri == NULL) {
4198 ctxt->errNo = XML_ERR_INVALID_URI;
4199 if ((ctxt->sax != NULL) &&
4200 (!ctxt->disableSAX) &&
4201 (ctxt->sax->error != NULL))
4202 ctxt->sax->error(ctxt->userData,
4203 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004204 /*
4205 * This really ought to be a well formedness error
4206 * but the XML Core WG decided otherwise c.f. issue
4207 * E26 of the XML erratas.
4208 */
Owen Taylor3473f882001-02-23 17:55:21 +00004209 } else {
4210 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004211 /*
4212 * Okay this is foolish to block those but not
4213 * invalid URIs.
4214 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004215 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 xmlFreeURI(uri);
4218 }
4219 }
4220 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004221 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4222 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 SKIP_BLANKS;
4225 if ((RAW == 'N') && (NXT(1) == 'D') &&
4226 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4227 (NXT(4) == 'A')) {
4228 SKIP(5);
4229 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004230 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4231 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004232 }
4233 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004234 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004235 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4236 (ctxt->sax->unparsedEntityDecl != NULL))
4237 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4238 literal, URI, ndata);
4239 } else {
4240 if ((ctxt->sax != NULL) &&
4241 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4242 ctxt->sax->entityDecl(ctxt->userData, name,
4243 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4244 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004245 /*
4246 * For expat compatibility in SAX mode.
4247 * assuming the entity repalcement was asked for
4248 */
4249 if ((ctxt->replaceEntities != 0) &&
4250 ((ctxt->myDoc == NULL) ||
4251 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4252 if (ctxt->myDoc == NULL) {
4253 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4254 }
4255
4256 if (ctxt->myDoc->intSubset == NULL)
4257 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4258 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004259 xmlSAX2EntityDecl(ctxt, name,
4260 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4261 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004262 }
Owen Taylor3473f882001-02-23 17:55:21 +00004263 }
4264 }
4265 }
4266 SKIP_BLANKS;
4267 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004268 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004269 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004270 } else {
4271 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004272 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4273 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 NEXT;
4276 }
4277 if (orig != NULL) {
4278 /*
4279 * Ugly mechanism to save the raw entity value.
4280 */
4281 xmlEntityPtr cur = NULL;
4282
4283 if (isParameter) {
4284 if ((ctxt->sax != NULL) &&
4285 (ctxt->sax->getParameterEntity != NULL))
4286 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4287 } else {
4288 if ((ctxt->sax != NULL) &&
4289 (ctxt->sax->getEntity != NULL))
4290 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004291 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004292 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004293 }
Owen Taylor3473f882001-02-23 17:55:21 +00004294 }
4295 if (cur != NULL) {
4296 if (cur->orig != NULL)
4297 xmlFree(orig);
4298 else
4299 cur->orig = orig;
4300 } else
4301 xmlFree(orig);
4302 }
Owen Taylor3473f882001-02-23 17:55:21 +00004303 if (value != NULL) xmlFree(value);
4304 if (URI != NULL) xmlFree(URI);
4305 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307}
4308
4309/**
4310 * xmlParseDefaultDecl:
4311 * @ctxt: an XML parser context
4312 * @value: Receive a possible fixed default value for the attribute
4313 *
4314 * Parse an attribute default declaration
4315 *
4316 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4317 *
4318 * [ VC: Required Attribute ]
4319 * if the default declaration is the keyword #REQUIRED, then the
4320 * attribute must be specified for all elements of the type in the
4321 * attribute-list declaration.
4322 *
4323 * [ VC: Attribute Default Legal ]
4324 * The declared default value must meet the lexical constraints of
4325 * the declared attribute type c.f. xmlValidateAttributeDecl()
4326 *
4327 * [ VC: Fixed Attribute Default ]
4328 * if an attribute has a default value declared with the #FIXED
4329 * keyword, instances of that attribute must match the default value.
4330 *
4331 * [ WFC: No < in Attribute Values ]
4332 * handled in xmlParseAttValue()
4333 *
4334 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4335 * or XML_ATTRIBUTE_FIXED.
4336 */
4337
4338int
4339xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4340 int val;
4341 xmlChar *ret;
4342
4343 *value = NULL;
4344 if ((RAW == '#') && (NXT(1) == 'R') &&
4345 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4346 (NXT(4) == 'U') && (NXT(5) == 'I') &&
4347 (NXT(6) == 'R') && (NXT(7) == 'E') &&
4348 (NXT(8) == 'D')) {
4349 SKIP(9);
4350 return(XML_ATTRIBUTE_REQUIRED);
4351 }
4352 if ((RAW == '#') && (NXT(1) == 'I') &&
4353 (NXT(2) == 'M') && (NXT(3) == 'P') &&
4354 (NXT(4) == 'L') && (NXT(5) == 'I') &&
4355 (NXT(6) == 'E') && (NXT(7) == 'D')) {
4356 SKIP(8);
4357 return(XML_ATTRIBUTE_IMPLIED);
4358 }
4359 val = XML_ATTRIBUTE_NONE;
4360 if ((RAW == '#') && (NXT(1) == 'F') &&
4361 (NXT(2) == 'I') && (NXT(3) == 'X') &&
4362 (NXT(4) == 'E') && (NXT(5) == 'D')) {
4363 SKIP(6);
4364 val = XML_ATTRIBUTE_FIXED;
4365 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004366 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4367 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004368 }
4369 SKIP_BLANKS;
4370 }
4371 ret = xmlParseAttValue(ctxt);
4372 ctxt->instate = XML_PARSER_DTD;
4373 if (ret == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004374 xmlFatalErrMsg(ctxt, ctxt->errNo,
4375 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004376 } else
4377 *value = ret;
4378 return(val);
4379}
4380
4381/**
4382 * xmlParseNotationType:
4383 * @ctxt: an XML parser context
4384 *
4385 * parse an Notation attribute type.
4386 *
4387 * Note: the leading 'NOTATION' S part has already being parsed...
4388 *
4389 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4390 *
4391 * [ VC: Notation Attributes ]
4392 * Values of this type must match one of the notation names included
4393 * in the declaration; all notation names in the declaration must be declared.
4394 *
4395 * Returns: the notation attribute tree built while parsing
4396 */
4397
4398xmlEnumerationPtr
4399xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004400 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004401 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4402
4403 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004404 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004405 return(NULL);
4406 }
4407 SHRINK;
4408 do {
4409 NEXT;
4410 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004411 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004412 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004413 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4414 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004415 return(ret);
4416 }
4417 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004418 if (cur == NULL) return(ret);
4419 if (last == NULL) ret = last = cur;
4420 else {
4421 last->next = cur;
4422 last = cur;
4423 }
4424 SKIP_BLANKS;
4425 } while (RAW == '|');
4426 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004427 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004428 if ((last != NULL) && (last != ret))
4429 xmlFreeEnumeration(last);
4430 return(ret);
4431 }
4432 NEXT;
4433 return(ret);
4434}
4435
4436/**
4437 * xmlParseEnumerationType:
4438 * @ctxt: an XML parser context
4439 *
4440 * parse an Enumeration attribute type.
4441 *
4442 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4443 *
4444 * [ VC: Enumeration ]
4445 * Values of this type must match one of the Nmtoken tokens in
4446 * the declaration
4447 *
4448 * Returns: the enumeration attribute tree built while parsing
4449 */
4450
4451xmlEnumerationPtr
4452xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4453 xmlChar *name;
4454 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4455
4456 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004457 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004458 return(NULL);
4459 }
4460 SHRINK;
4461 do {
4462 NEXT;
4463 SKIP_BLANKS;
4464 name = xmlParseNmtoken(ctxt);
4465 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004466 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004467 return(ret);
4468 }
4469 cur = xmlCreateEnumeration(name);
4470 xmlFree(name);
4471 if (cur == NULL) return(ret);
4472 if (last == NULL) ret = last = cur;
4473 else {
4474 last->next = cur;
4475 last = cur;
4476 }
4477 SKIP_BLANKS;
4478 } while (RAW == '|');
4479 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004480 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(ret);
4482 }
4483 NEXT;
4484 return(ret);
4485}
4486
4487/**
4488 * xmlParseEnumeratedType:
4489 * @ctxt: an XML parser context
4490 * @tree: the enumeration tree built while parsing
4491 *
4492 * parse an Enumerated attribute type.
4493 *
4494 * [57] EnumeratedType ::= NotationType | Enumeration
4495 *
4496 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4497 *
4498 *
4499 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4500 */
4501
4502int
4503xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4504 if ((RAW == 'N') && (NXT(1) == 'O') &&
4505 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4506 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4507 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4508 SKIP(8);
4509 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004510 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4511 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004512 return(0);
4513 }
4514 SKIP_BLANKS;
4515 *tree = xmlParseNotationType(ctxt);
4516 if (*tree == NULL) return(0);
4517 return(XML_ATTRIBUTE_NOTATION);
4518 }
4519 *tree = xmlParseEnumerationType(ctxt);
4520 if (*tree == NULL) return(0);
4521 return(XML_ATTRIBUTE_ENUMERATION);
4522}
4523
4524/**
4525 * xmlParseAttributeType:
4526 * @ctxt: an XML parser context
4527 * @tree: the enumeration tree built while parsing
4528 *
4529 * parse the Attribute list def for an element
4530 *
4531 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4532 *
4533 * [55] StringType ::= 'CDATA'
4534 *
4535 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4536 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4537 *
4538 * Validity constraints for attribute values syntax are checked in
4539 * xmlValidateAttributeValue()
4540 *
4541 * [ VC: ID ]
4542 * Values of type ID must match the Name production. A name must not
4543 * appear more than once in an XML document as a value of this type;
4544 * i.e., ID values must uniquely identify the elements which bear them.
4545 *
4546 * [ VC: One ID per Element Type ]
4547 * No element type may have more than one ID attribute specified.
4548 *
4549 * [ VC: ID Attribute Default ]
4550 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4551 *
4552 * [ VC: IDREF ]
4553 * Values of type IDREF must match the Name production, and values
4554 * of type IDREFS must match Names; each IDREF Name must match the value
4555 * of an ID attribute on some element in the XML document; i.e. IDREF
4556 * values must match the value of some ID attribute.
4557 *
4558 * [ VC: Entity Name ]
4559 * Values of type ENTITY must match the Name production, values
4560 * of type ENTITIES must match Names; each Entity Name must match the
4561 * name of an unparsed entity declared in the DTD.
4562 *
4563 * [ VC: Name Token ]
4564 * Values of type NMTOKEN must match the Nmtoken production; values
4565 * of type NMTOKENS must match Nmtokens.
4566 *
4567 * Returns the attribute type
4568 */
4569int
4570xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4571 SHRINK;
4572 if ((RAW == 'C') && (NXT(1) == 'D') &&
4573 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4574 (NXT(4) == 'A')) {
4575 SKIP(5);
4576 return(XML_ATTRIBUTE_CDATA);
4577 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4578 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4579 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4580 SKIP(6);
4581 return(XML_ATTRIBUTE_IDREFS);
4582 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4583 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4584 (NXT(4) == 'F')) {
4585 SKIP(5);
4586 return(XML_ATTRIBUTE_IDREF);
4587 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4588 SKIP(2);
4589 return(XML_ATTRIBUTE_ID);
4590 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4591 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4592 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4593 SKIP(6);
4594 return(XML_ATTRIBUTE_ENTITY);
4595 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4596 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4597 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4598 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4599 SKIP(8);
4600 return(XML_ATTRIBUTE_ENTITIES);
4601 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4602 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4603 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4604 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4605 SKIP(8);
4606 return(XML_ATTRIBUTE_NMTOKENS);
4607 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4608 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4609 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4610 (NXT(6) == 'N')) {
4611 SKIP(7);
4612 return(XML_ATTRIBUTE_NMTOKEN);
4613 }
4614 return(xmlParseEnumeratedType(ctxt, tree));
4615}
4616
4617/**
4618 * xmlParseAttributeListDecl:
4619 * @ctxt: an XML parser context
4620 *
4621 * : parse the Attribute list def for an element
4622 *
4623 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4624 *
4625 * [53] AttDef ::= S Name S AttType S DefaultDecl
4626 *
4627 */
4628void
4629xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004630 const xmlChar *elemName;
4631 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004632 xmlEnumerationPtr tree;
4633
4634 if ((RAW == '<') && (NXT(1) == '!') &&
4635 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4636 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4637 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4638 (NXT(8) == 'T')) {
4639 xmlParserInputPtr input = ctxt->input;
4640
4641 SKIP(9);
4642 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004643 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004644 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004645 }
4646 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004647 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004649 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4650 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004651 return;
4652 }
4653 SKIP_BLANKS;
4654 GROW;
4655 while (RAW != '>') {
4656 const xmlChar *check = CUR_PTR;
4657 int type;
4658 int def;
4659 xmlChar *defaultValue = NULL;
4660
4661 GROW;
4662 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004663 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004664 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004665 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4666 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004667 break;
4668 }
4669 GROW;
4670 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004671 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004672 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004673 if (defaultValue != NULL)
4674 xmlFree(defaultValue);
4675 break;
4676 }
4677 SKIP_BLANKS;
4678
4679 type = xmlParseAttributeType(ctxt, &tree);
4680 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004681 if (defaultValue != NULL)
4682 xmlFree(defaultValue);
4683 break;
4684 }
4685
4686 GROW;
4687 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4689 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if (defaultValue != NULL)
4691 xmlFree(defaultValue);
4692 if (tree != NULL)
4693 xmlFreeEnumeration(tree);
4694 break;
4695 }
4696 SKIP_BLANKS;
4697
4698 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4699 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004700 if (defaultValue != NULL)
4701 xmlFree(defaultValue);
4702 if (tree != NULL)
4703 xmlFreeEnumeration(tree);
4704 break;
4705 }
4706
4707 GROW;
4708 if (RAW != '>') {
4709 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004711 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004712 if (defaultValue != NULL)
4713 xmlFree(defaultValue);
4714 if (tree != NULL)
4715 xmlFreeEnumeration(tree);
4716 break;
4717 }
4718 SKIP_BLANKS;
4719 }
4720 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004721 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4722 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004723 if (defaultValue != NULL)
4724 xmlFree(defaultValue);
4725 if (tree != NULL)
4726 xmlFreeEnumeration(tree);
4727 break;
4728 }
4729 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4730 (ctxt->sax->attributeDecl != NULL))
4731 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4732 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004733 else if (tree != NULL)
4734 xmlFreeEnumeration(tree);
4735
4736 if ((ctxt->sax2) && (defaultValue != NULL) &&
4737 (def != XML_ATTRIBUTE_IMPLIED) &&
4738 (def != XML_ATTRIBUTE_REQUIRED)) {
4739 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4740 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004741 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4742 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4743 }
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (defaultValue != NULL)
4745 xmlFree(defaultValue);
4746 GROW;
4747 }
4748 if (RAW == '>') {
4749 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004750 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4751 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004752 }
4753 NEXT;
4754 }
Owen Taylor3473f882001-02-23 17:55:21 +00004755 }
4756}
4757
4758/**
4759 * xmlParseElementMixedContentDecl:
4760 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004761 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004762 *
4763 * parse the declaration for a Mixed Element content
4764 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4765 *
4766 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4767 * '(' S? '#PCDATA' S? ')'
4768 *
4769 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4770 *
4771 * [ VC: No Duplicate Types ]
4772 * The same name must not appear more than once in a single
4773 * mixed-content declaration.
4774 *
4775 * returns: the list of the xmlElementContentPtr describing the element choices
4776 */
4777xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004778xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004779 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004780 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004781
4782 GROW;
4783 if ((RAW == '#') && (NXT(1) == 'P') &&
4784 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4785 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4786 (NXT(6) == 'A')) {
4787 SKIP(7);
4788 SKIP_BLANKS;
4789 SHRINK;
4790 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004791 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004792 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4793 if (ctxt->vctxt.error != NULL)
4794 ctxt->vctxt.error(ctxt->vctxt.userData,
4795"Element content declaration doesn't start and stop in the same entity\n");
4796 ctxt->valid = 0;
4797 }
Owen Taylor3473f882001-02-23 17:55:21 +00004798 NEXT;
4799 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4800 if (RAW == '*') {
4801 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4802 NEXT;
4803 }
4804 return(ret);
4805 }
4806 if ((RAW == '(') || (RAW == '|')) {
4807 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4808 if (ret == NULL) return(NULL);
4809 }
4810 while (RAW == '|') {
4811 NEXT;
4812 if (elem == NULL) {
4813 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4814 if (ret == NULL) return(NULL);
4815 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004816 if (cur != NULL)
4817 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004818 cur = ret;
4819 } else {
4820 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4821 if (n == NULL) return(NULL);
4822 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004823 if (n->c1 != NULL)
4824 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004825 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004826 if (n != NULL)
4827 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004828 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004829 }
4830 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004831 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004832 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004833 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004834 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004835 xmlFreeElementContent(cur);
4836 return(NULL);
4837 }
4838 SKIP_BLANKS;
4839 GROW;
4840 }
4841 if ((RAW == ')') && (NXT(1) == '*')) {
4842 if (elem != NULL) {
4843 cur->c2 = xmlNewElementContent(elem,
4844 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004845 if (cur->c2 != NULL)
4846 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004847 }
4848 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004849 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004850 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4851 if (ctxt->vctxt.error != NULL)
4852 ctxt->vctxt.error(ctxt->vctxt.userData,
4853"Element content declaration doesn't start and stop in the same entity\n");
4854 ctxt->valid = 0;
4855 }
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP(2);
4857 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004858 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004859 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 return(NULL);
4861 }
4862
4863 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004864 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004865 }
4866 return(ret);
4867}
4868
4869/**
4870 * xmlParseElementChildrenContentDecl:
4871 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004872 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004873 *
4874 * parse the declaration for a Mixed Element content
4875 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4876 *
4877 *
4878 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4879 *
4880 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4881 *
4882 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4883 *
4884 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4885 *
4886 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4887 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004888 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004889 * opening or closing parentheses in a choice, seq, or Mixed
4890 * construct is contained in the replacement text for a parameter
4891 * entity, both must be contained in the same replacement text. For
4892 * interoperability, if a parameter-entity reference appears in a
4893 * choice, seq, or Mixed construct, its replacement text should not
4894 * be empty, and neither the first nor last non-blank character of
4895 * the replacement text should be a connector (| or ,).
4896 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004897 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004898 * hierarchy.
4899 */
4900xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004901xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004902 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004903 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004904 xmlChar type = 0;
4905
4906 SKIP_BLANKS;
4907 GROW;
4908 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004909 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004910
Owen Taylor3473f882001-02-23 17:55:21 +00004911 /* Recurse on first child */
4912 NEXT;
4913 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004914 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004915 SKIP_BLANKS;
4916 GROW;
4917 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004918 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004920 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004921 return(NULL);
4922 }
4923 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004924 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004925 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004926 return(NULL);
4927 }
Owen Taylor3473f882001-02-23 17:55:21 +00004928 GROW;
4929 if (RAW == '?') {
4930 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4931 NEXT;
4932 } else if (RAW == '*') {
4933 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4934 NEXT;
4935 } else if (RAW == '+') {
4936 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4937 NEXT;
4938 } else {
4939 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4940 }
Owen Taylor3473f882001-02-23 17:55:21 +00004941 GROW;
4942 }
4943 SKIP_BLANKS;
4944 SHRINK;
4945 while (RAW != ')') {
4946 /*
4947 * Each loop we parse one separator and one element.
4948 */
4949 if (RAW == ',') {
4950 if (type == 0) type = CUR;
4951
4952 /*
4953 * Detect "Name | Name , Name" error
4954 */
4955 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004956 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004957 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004958 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004959 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004960 xmlFreeElementContent(last);
4961 if (ret != NULL)
4962 xmlFreeElementContent(ret);
4963 return(NULL);
4964 }
4965 NEXT;
4966
4967 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4968 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004969 if ((last != NULL) && (last != ret))
4970 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004971 xmlFreeElementContent(ret);
4972 return(NULL);
4973 }
4974 if (last == NULL) {
4975 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004976 if (ret != NULL)
4977 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004978 ret = cur = op;
4979 } else {
4980 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004981 if (op != NULL)
4982 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004983 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004984 if (last != NULL)
4985 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004986 cur =op;
4987 last = NULL;
4988 }
4989 } else if (RAW == '|') {
4990 if (type == 0) type = CUR;
4991
4992 /*
4993 * Detect "Name , Name | Name" error
4994 */
4995 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004996 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004997 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004998 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004999 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005000 xmlFreeElementContent(last);
5001 if (ret != NULL)
5002 xmlFreeElementContent(ret);
5003 return(NULL);
5004 }
5005 NEXT;
5006
5007 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5008 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005009 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005010 xmlFreeElementContent(last);
5011 if (ret != NULL)
5012 xmlFreeElementContent(ret);
5013 return(NULL);
5014 }
5015 if (last == NULL) {
5016 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005017 if (ret != NULL)
5018 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005019 ret = cur = op;
5020 } else {
5021 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005022 if (op != NULL)
5023 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005024 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005025 if (last != NULL)
5026 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005027 cur =op;
5028 last = NULL;
5029 }
5030 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005031 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005032 if (ret != NULL)
5033 xmlFreeElementContent(ret);
5034 return(NULL);
5035 }
5036 GROW;
5037 SKIP_BLANKS;
5038 GROW;
5039 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005040 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005041 /* Recurse on second child */
5042 NEXT;
5043 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005044 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005045 SKIP_BLANKS;
5046 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005047 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005048 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005049 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 if (ret != NULL)
5051 xmlFreeElementContent(ret);
5052 return(NULL);
5053 }
5054 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005055 if (RAW == '?') {
5056 last->ocur = XML_ELEMENT_CONTENT_OPT;
5057 NEXT;
5058 } else if (RAW == '*') {
5059 last->ocur = XML_ELEMENT_CONTENT_MULT;
5060 NEXT;
5061 } else if (RAW == '+') {
5062 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5063 NEXT;
5064 } else {
5065 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5066 }
5067 }
5068 SKIP_BLANKS;
5069 GROW;
5070 }
5071 if ((cur != NULL) && (last != NULL)) {
5072 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005073 if (last != NULL)
5074 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005075 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005076 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005077 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5078 if (ctxt->vctxt.error != NULL)
5079 ctxt->vctxt.error(ctxt->vctxt.userData,
5080"Element content declaration doesn't start and stop in the same entity\n");
5081 ctxt->valid = 0;
5082 }
Owen Taylor3473f882001-02-23 17:55:21 +00005083 NEXT;
5084 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005085 if (ret != NULL)
5086 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005087 NEXT;
5088 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005089 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005090 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005091 cur = ret;
5092 /*
5093 * Some normalization:
5094 * (a | b* | c?)* == (a | b | c)*
5095 */
5096 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5097 if ((cur->c1 != NULL) &&
5098 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5099 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5100 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5101 if ((cur->c2 != NULL) &&
5102 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5103 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5104 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5105 cur = cur->c2;
5106 }
5107 }
Owen Taylor3473f882001-02-23 17:55:21 +00005108 NEXT;
5109 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005110 if (ret != NULL) {
5111 int found = 0;
5112
Daniel Veillarde470df72001-04-18 21:41:07 +00005113 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005114 /*
5115 * Some normalization:
5116 * (a | b*)+ == (a | b)*
5117 * (a | b?)+ == (a | b)*
5118 */
5119 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5120 if ((cur->c1 != NULL) &&
5121 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5122 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5123 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5124 found = 1;
5125 }
5126 if ((cur->c2 != NULL) &&
5127 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5128 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5129 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5130 found = 1;
5131 }
5132 cur = cur->c2;
5133 }
5134 if (found)
5135 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5136 }
Owen Taylor3473f882001-02-23 17:55:21 +00005137 NEXT;
5138 }
5139 return(ret);
5140}
5141
5142/**
5143 * xmlParseElementContentDecl:
5144 * @ctxt: an XML parser context
5145 * @name: the name of the element being defined.
5146 * @result: the Element Content pointer will be stored here if any
5147 *
5148 * parse the declaration for an Element content either Mixed or Children,
5149 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5150 *
5151 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5152 *
5153 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5154 */
5155
5156int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005157xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005158 xmlElementContentPtr *result) {
5159
5160 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005161 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005162 int res;
5163
5164 *result = NULL;
5165
5166 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005167 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005168 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005169 return(-1);
5170 }
5171 NEXT;
5172 GROW;
5173 SKIP_BLANKS;
5174 if ((RAW == '#') && (NXT(1) == 'P') &&
5175 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5176 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5177 (NXT(6) == 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005178 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005179 res = XML_ELEMENT_TYPE_MIXED;
5180 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005181 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005182 res = XML_ELEMENT_TYPE_ELEMENT;
5183 }
Owen Taylor3473f882001-02-23 17:55:21 +00005184 SKIP_BLANKS;
5185 *result = tree;
5186 return(res);
5187}
5188
5189/**
5190 * xmlParseElementDecl:
5191 * @ctxt: an XML parser context
5192 *
5193 * parse an Element declaration.
5194 *
5195 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5196 *
5197 * [ VC: Unique Element Type Declaration ]
5198 * No element type may be declared more than once
5199 *
5200 * Returns the type of the element, or -1 in case of error
5201 */
5202int
5203xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005204 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005205 int ret = -1;
5206 xmlElementContentPtr content = NULL;
5207
5208 GROW;
5209 if ((RAW == '<') && (NXT(1) == '!') &&
5210 (NXT(2) == 'E') && (NXT(3) == 'L') &&
5211 (NXT(4) == 'E') && (NXT(5) == 'M') &&
5212 (NXT(6) == 'E') && (NXT(7) == 'N') &&
5213 (NXT(8) == 'T')) {
5214 xmlParserInputPtr input = ctxt->input;
5215
5216 SKIP(9);
5217 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005218 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5219 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005220 }
5221 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005222 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005224 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5225 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005226 return(-1);
5227 }
5228 while ((RAW == 0) && (ctxt->inputNr > 1))
5229 xmlPopInput(ctxt);
5230 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005231 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5232 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005233 }
5234 SKIP_BLANKS;
5235 if ((RAW == 'E') && (NXT(1) == 'M') &&
5236 (NXT(2) == 'P') && (NXT(3) == 'T') &&
5237 (NXT(4) == 'Y')) {
5238 SKIP(5);
5239 /*
5240 * Element must always be empty.
5241 */
5242 ret = XML_ELEMENT_TYPE_EMPTY;
5243 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5244 (NXT(2) == 'Y')) {
5245 SKIP(3);
5246 /*
5247 * Element is a generic container.
5248 */
5249 ret = XML_ELEMENT_TYPE_ANY;
5250 } else if (RAW == '(') {
5251 ret = xmlParseElementContentDecl(ctxt, name, &content);
5252 } else {
5253 /*
5254 * [ WFC: PEs in Internal Subset ] error handling.
5255 */
5256 if ((RAW == '%') && (ctxt->external == 0) &&
5257 (ctxt->inputNr == 1)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005258 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
5259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5260 ctxt->sax->error(ctxt->userData,
5261 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005262 } else {
5263 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
5264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5265 ctxt->sax->error(ctxt->userData,
5266 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5267 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005268 ctxt->wellFormed = 0;
5269 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return(-1);
5271 }
5272
5273 SKIP_BLANKS;
5274 /*
5275 * Pop-up of finished entities.
5276 */
5277 while ((RAW == 0) && (ctxt->inputNr > 1))
5278 xmlPopInput(ctxt);
5279 SKIP_BLANKS;
5280
5281 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005282 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005283 } else {
5284 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005285 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5286 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005287 }
5288
5289 NEXT;
5290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5291 (ctxt->sax->elementDecl != NULL))
5292 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5293 content);
5294 }
5295 if (content != NULL) {
5296 xmlFreeElementContent(content);
5297 }
Owen Taylor3473f882001-02-23 17:55:21 +00005298 }
5299 return(ret);
5300}
5301
5302/**
Owen Taylor3473f882001-02-23 17:55:21 +00005303 * xmlParseConditionalSections
5304 * @ctxt: an XML parser context
5305 *
5306 * [61] conditionalSect ::= includeSect | ignoreSect
5307 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5308 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5309 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5310 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5311 */
5312
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005313static void
Owen Taylor3473f882001-02-23 17:55:21 +00005314xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5315 SKIP(3);
5316 SKIP_BLANKS;
5317 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5318 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5319 (NXT(6) == 'E')) {
5320 SKIP(7);
5321 SKIP_BLANKS;
5322 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005323 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005324 } else {
5325 NEXT;
5326 }
5327 if (xmlParserDebugEntities) {
5328 if ((ctxt->input != NULL) && (ctxt->input->filename))
5329 xmlGenericError(xmlGenericErrorContext,
5330 "%s(%d): ", ctxt->input->filename,
5331 ctxt->input->line);
5332 xmlGenericError(xmlGenericErrorContext,
5333 "Entering INCLUDE Conditional Section\n");
5334 }
5335
5336 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5337 (NXT(2) != '>'))) {
5338 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005339 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005340
5341 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5342 xmlParseConditionalSections(ctxt);
5343 } else if (IS_BLANK(CUR)) {
5344 NEXT;
5345 } else if (RAW == '%') {
5346 xmlParsePEReference(ctxt);
5347 } else
5348 xmlParseMarkupDecl(ctxt);
5349
5350 /*
5351 * Pop-up of finished entities.
5352 */
5353 while ((RAW == 0) && (ctxt->inputNr > 1))
5354 xmlPopInput(ctxt);
5355
Daniel Veillardfdc91562002-07-01 21:52:03 +00005356 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005357 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005358 break;
5359 }
5360 }
5361 if (xmlParserDebugEntities) {
5362 if ((ctxt->input != NULL) && (ctxt->input->filename))
5363 xmlGenericError(xmlGenericErrorContext,
5364 "%s(%d): ", ctxt->input->filename,
5365 ctxt->input->line);
5366 xmlGenericError(xmlGenericErrorContext,
5367 "Leaving INCLUDE Conditional Section\n");
5368 }
5369
5370 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5371 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5372 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005373 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005374 int depth = 0;
5375
5376 SKIP(6);
5377 SKIP_BLANKS;
5378 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005379 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 } else {
5381 NEXT;
5382 }
5383 if (xmlParserDebugEntities) {
5384 if ((ctxt->input != NULL) && (ctxt->input->filename))
5385 xmlGenericError(xmlGenericErrorContext,
5386 "%s(%d): ", ctxt->input->filename,
5387 ctxt->input->line);
5388 xmlGenericError(xmlGenericErrorContext,
5389 "Entering IGNORE Conditional Section\n");
5390 }
5391
5392 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005393 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005394 * But disable SAX event generating DTD building in the meantime
5395 */
5396 state = ctxt->disableSAX;
5397 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005398 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005399 ctxt->instate = XML_PARSER_IGNORE;
5400
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005401 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005402 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5403 depth++;
5404 SKIP(3);
5405 continue;
5406 }
5407 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5408 if (--depth >= 0) SKIP(3);
5409 continue;
5410 }
5411 NEXT;
5412 continue;
5413 }
5414
5415 ctxt->disableSAX = state;
5416 ctxt->instate = instate;
5417
5418 if (xmlParserDebugEntities) {
5419 if ((ctxt->input != NULL) && (ctxt->input->filename))
5420 xmlGenericError(xmlGenericErrorContext,
5421 "%s(%d): ", ctxt->input->filename,
5422 ctxt->input->line);
5423 xmlGenericError(xmlGenericErrorContext,
5424 "Leaving IGNORE Conditional Section\n");
5425 }
5426
5427 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005428 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 }
5430
5431 if (RAW == 0)
5432 SHRINK;
5433
5434 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005435 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005436 } else {
5437 SKIP(3);
5438 }
5439}
5440
5441/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005442 * xmlParseMarkupDecl:
5443 * @ctxt: an XML parser context
5444 *
5445 * parse Markup declarations
5446 *
5447 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5448 * NotationDecl | PI | Comment
5449 *
5450 * [ VC: Proper Declaration/PE Nesting ]
5451 * Parameter-entity replacement text must be properly nested with
5452 * markup declarations. That is to say, if either the first character
5453 * or the last character of a markup declaration (markupdecl above) is
5454 * contained in the replacement text for a parameter-entity reference,
5455 * both must be contained in the same replacement text.
5456 *
5457 * [ WFC: PEs in Internal Subset ]
5458 * In the internal DTD subset, parameter-entity references can occur
5459 * only where markup declarations can occur, not within markup declarations.
5460 * (This does not apply to references that occur in external parameter
5461 * entities or to the external subset.)
5462 */
5463void
5464xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5465 GROW;
5466 xmlParseElementDecl(ctxt);
5467 xmlParseAttributeListDecl(ctxt);
5468 xmlParseEntityDecl(ctxt);
5469 xmlParseNotationDecl(ctxt);
5470 xmlParsePI(ctxt);
5471 xmlParseComment(ctxt);
5472 /*
5473 * This is only for internal subset. On external entities,
5474 * the replacement is done before parsing stage
5475 */
5476 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5477 xmlParsePEReference(ctxt);
5478
5479 /*
5480 * Conditional sections are allowed from entities included
5481 * by PE References in the internal subset.
5482 */
5483 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5484 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5485 xmlParseConditionalSections(ctxt);
5486 }
5487 }
5488
5489 ctxt->instate = XML_PARSER_DTD;
5490}
5491
5492/**
5493 * xmlParseTextDecl:
5494 * @ctxt: an XML parser context
5495 *
5496 * parse an XML declaration header for external entities
5497 *
5498 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5499 *
5500 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5501 */
5502
5503void
5504xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5505 xmlChar *version;
5506
5507 /*
5508 * We know that '<?xml' is here.
5509 */
5510 if ((RAW == '<') && (NXT(1) == '?') &&
5511 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5512 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5513 SKIP(5);
5514 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005515 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005516 return;
5517 }
5518
5519 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005520 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5521 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005522 }
5523 SKIP_BLANKS;
5524
5525 /*
5526 * We may have the VersionInfo here.
5527 */
5528 version = xmlParseVersionInfo(ctxt);
5529 if (version == NULL)
5530 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005531 else {
5532 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5534 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005535 }
5536 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005537 ctxt->input->version = version;
5538
5539 /*
5540 * We must have the encoding declaration
5541 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005542 xmlParseEncodingDecl(ctxt);
5543 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5544 /*
5545 * The XML REC instructs us to stop parsing right here
5546 */
5547 return;
5548 }
5549
5550 SKIP_BLANKS;
5551 if ((RAW == '?') && (NXT(1) == '>')) {
5552 SKIP(2);
5553 } else if (RAW == '>') {
5554 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005555 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005556 NEXT;
5557 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005558 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005559 MOVETO_ENDTAG(CUR_PTR);
5560 NEXT;
5561 }
5562}
5563
5564/**
Owen Taylor3473f882001-02-23 17:55:21 +00005565 * xmlParseExternalSubset:
5566 * @ctxt: an XML parser context
5567 * @ExternalID: the external identifier
5568 * @SystemID: the system identifier (or URL)
5569 *
5570 * parse Markup declarations from an external subset
5571 *
5572 * [30] extSubset ::= textDecl? extSubsetDecl
5573 *
5574 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5575 */
5576void
5577xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5578 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005579 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 GROW;
5581 if ((RAW == '<') && (NXT(1) == '?') &&
5582 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5583 (NXT(4) == 'l')) {
5584 xmlParseTextDecl(ctxt);
5585 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5586 /*
5587 * The XML REC instructs us to stop parsing right here
5588 */
5589 ctxt->instate = XML_PARSER_EOF;
5590 return;
5591 }
5592 }
5593 if (ctxt->myDoc == NULL) {
5594 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5595 }
5596 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5597 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5598
5599 ctxt->instate = XML_PARSER_DTD;
5600 ctxt->external = 1;
5601 while (((RAW == '<') && (NXT(1) == '?')) ||
5602 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005603 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005604 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005605 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005606
5607 GROW;
5608 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5609 xmlParseConditionalSections(ctxt);
5610 } else if (IS_BLANK(CUR)) {
5611 NEXT;
5612 } else if (RAW == '%') {
5613 xmlParsePEReference(ctxt);
5614 } else
5615 xmlParseMarkupDecl(ctxt);
5616
5617 /*
5618 * Pop-up of finished entities.
5619 */
5620 while ((RAW == 0) && (ctxt->inputNr > 1))
5621 xmlPopInput(ctxt);
5622
Daniel Veillardfdc91562002-07-01 21:52:03 +00005623 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005624 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005625 break;
5626 }
5627 }
5628
5629 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005630 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005631 }
5632
5633}
5634
5635/**
5636 * xmlParseReference:
5637 * @ctxt: an XML parser context
5638 *
5639 * parse and handle entity references in content, depending on the SAX
5640 * interface, this may end-up in a call to character() if this is a
5641 * CharRef, a predefined entity, if there is no reference() callback.
5642 * or if the parser was asked to switch to that mode.
5643 *
5644 * [67] Reference ::= EntityRef | CharRef
5645 */
5646void
5647xmlParseReference(xmlParserCtxtPtr ctxt) {
5648 xmlEntityPtr ent;
5649 xmlChar *val;
5650 if (RAW != '&') return;
5651
5652 if (NXT(1) == '#') {
5653 int i = 0;
5654 xmlChar out[10];
5655 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005656 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005657
5658 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5659 /*
5660 * So we are using non-UTF-8 buffers
5661 * Check that the char fit on 8bits, if not
5662 * generate a CharRef.
5663 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005664 if (value <= 0xFF) {
5665 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005666 out[1] = 0;
5667 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5668 (!ctxt->disableSAX))
5669 ctxt->sax->characters(ctxt->userData, out, 1);
5670 } else {
5671 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005672 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005674 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005675 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5676 (!ctxt->disableSAX))
5677 ctxt->sax->reference(ctxt->userData, out);
5678 }
5679 } else {
5680 /*
5681 * Just encode the value in UTF-8
5682 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005683 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005684 out[i] = 0;
5685 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5686 (!ctxt->disableSAX))
5687 ctxt->sax->characters(ctxt->userData, out, i);
5688 }
5689 } else {
5690 ent = xmlParseEntityRef(ctxt);
5691 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005692 if (!ctxt->wellFormed)
5693 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005694 if ((ent->name != NULL) &&
5695 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5696 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005697 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005698
5699
5700 /*
5701 * The first reference to the entity trigger a parsing phase
5702 * where the ent->children is filled with the result from
5703 * the parsing.
5704 */
5705 if (ent->children == NULL) {
5706 xmlChar *value;
5707 value = ent->content;
5708
5709 /*
5710 * Check that this entity is well formed
5711 */
5712 if ((value != NULL) &&
5713 (value[1] == 0) && (value[0] == '<') &&
5714 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5715 /*
5716 * DONE: get definite answer on this !!!
5717 * Lots of entity decls are used to declare a single
5718 * char
5719 * <!ENTITY lt "<">
5720 * Which seems to be valid since
5721 * 2.4: The ampersand character (&) and the left angle
5722 * bracket (<) may appear in their literal form only
5723 * when used ... They are also legal within the literal
5724 * entity value of an internal entity declaration;i
5725 * see "4.3.2 Well-Formed Parsed Entities".
5726 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5727 * Looking at the OASIS test suite and James Clark
5728 * tests, this is broken. However the XML REC uses
5729 * it. Is the XML REC not well-formed ????
5730 * This is a hack to avoid this problem
5731 *
5732 * ANSWER: since lt gt amp .. are already defined,
5733 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005734 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005735 * is lousy but acceptable.
5736 */
5737 list = xmlNewDocText(ctxt->myDoc, value);
5738 if (list != NULL) {
5739 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5740 (ent->children == NULL)) {
5741 ent->children = list;
5742 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005743 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 list->parent = (xmlNodePtr) ent;
5745 } else {
5746 xmlFreeNodeList(list);
5747 }
5748 } else if (list != NULL) {
5749 xmlFreeNodeList(list);
5750 }
5751 } else {
5752 /*
5753 * 4.3.2: An internal general parsed entity is well-formed
5754 * if its replacement text matches the production labeled
5755 * content.
5756 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005757
5758 void *user_data;
5759 /*
5760 * This is a bit hackish but this seems the best
5761 * way to make sure both SAX and DOM entity support
5762 * behaves okay.
5763 */
5764 if (ctxt->userData == ctxt)
5765 user_data = NULL;
5766 else
5767 user_data = ctxt->userData;
5768
Owen Taylor3473f882001-02-23 17:55:21 +00005769 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5770 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005771 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5772 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005773 ctxt->depth--;
5774 } else if (ent->etype ==
5775 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5776 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005777 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005778 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005779 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005780 ctxt->depth--;
5781 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005782 ret = XML_ERR_ENTITY_PE_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00005783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5784 ctxt->sax->error(ctxt->userData,
5785 "Internal: invalid entity type\n");
5786 }
5787 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005788 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005789 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005790 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005791 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5792 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005793 (ent->children == NULL)) {
5794 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005795 if (ctxt->replaceEntities) {
5796 /*
5797 * Prune it directly in the generated document
5798 * except for single text nodes.
5799 */
5800 if ((list->type == XML_TEXT_NODE) &&
5801 (list->next == NULL)) {
5802 list->parent = (xmlNodePtr) ent;
5803 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005804 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005805 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005806 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005807 while (list != NULL) {
5808 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005809 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005810 if (list->next == NULL)
5811 ent->last = list;
5812 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005813 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005814 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005815 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5816 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005817 }
5818 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005819 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005820 while (list != NULL) {
5821 list->parent = (xmlNodePtr) ent;
5822 if (list->next == NULL)
5823 ent->last = list;
5824 list = list->next;
5825 }
Owen Taylor3473f882001-02-23 17:55:21 +00005826 }
5827 } else {
5828 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005829 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005830 }
5831 } else if (ret > 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005832 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005833 } else if (list != NULL) {
5834 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005835 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005836 }
5837 }
5838 }
5839 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5840 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5841 /*
5842 * Create a node.
5843 */
5844 ctxt->sax->reference(ctxt->userData, ent->name);
5845 return;
5846 } else if (ctxt->replaceEntities) {
5847 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5848 /*
5849 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005850 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005851 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005852 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005853 if ((list == NULL) && (ent->owner == 0)) {
5854 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005855 cur = ent->children;
5856 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005857 nw = xmlCopyNode(cur, 1);
5858 if (nw != NULL) {
5859 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005860 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005861 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005862 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005863 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005864 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005865 if (cur == ent->last)
5866 break;
5867 cur = cur->next;
5868 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005869 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005870 xmlAddEntityReference(ent, firstChild, nw);
5871 } else if (list == NULL) {
5872 xmlNodePtr nw = NULL, cur, next, last,
5873 firstChild = NULL;
5874 /*
5875 * Copy the entity child list and make it the new
5876 * entity child list. The goal is to make sure any
5877 * ID or REF referenced will be the one from the
5878 * document content and not the entity copy.
5879 */
5880 cur = ent->children;
5881 ent->children = NULL;
5882 last = ent->last;
5883 ent->last = NULL;
5884 while (cur != NULL) {
5885 next = cur->next;
5886 cur->next = NULL;
5887 cur->parent = NULL;
5888 nw = xmlCopyNode(cur, 1);
5889 if (nw != NULL) {
5890 nw->_private = cur->_private;
5891 if (firstChild == NULL){
5892 firstChild = cur;
5893 }
5894 xmlAddChild((xmlNodePtr) ent, nw);
5895 xmlAddChild(ctxt->node, cur);
5896 }
5897 if (cur == last)
5898 break;
5899 cur = next;
5900 }
5901 ent->owner = 1;
5902 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5903 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005904 } else {
5905 /*
5906 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005907 * node with a possible previous text one which
5908 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005909 */
5910 if (ent->children->type == XML_TEXT_NODE)
5911 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5912 if ((ent->last != ent->children) &&
5913 (ent->last->type == XML_TEXT_NODE))
5914 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5915 xmlAddChildList(ctxt->node, ent->children);
5916 }
5917
Owen Taylor3473f882001-02-23 17:55:21 +00005918 /*
5919 * This is to avoid a nasty side effect, see
5920 * characters() in SAX.c
5921 */
5922 ctxt->nodemem = 0;
5923 ctxt->nodelen = 0;
5924 return;
5925 } else {
5926 /*
5927 * Probably running in SAX mode
5928 */
5929 xmlParserInputPtr input;
5930
5931 input = xmlNewEntityInputStream(ctxt, ent);
5932 xmlPushInput(ctxt, input);
5933 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5934 (RAW == '<') && (NXT(1) == '?') &&
5935 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5936 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5937 xmlParseTextDecl(ctxt);
5938 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5939 /*
5940 * The XML REC instructs us to stop parsing right here
5941 */
5942 ctxt->instate = XML_PARSER_EOF;
5943 return;
5944 }
5945 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005946 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5947 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005948 }
5949 }
5950 return;
5951 }
5952 }
5953 } else {
5954 val = ent->content;
5955 if (val == NULL) return;
5956 /*
5957 * inline the entity.
5958 */
5959 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5960 (!ctxt->disableSAX))
5961 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5962 }
5963 }
5964}
5965
5966/**
5967 * xmlParseEntityRef:
5968 * @ctxt: an XML parser context
5969 *
5970 * parse ENTITY references declarations
5971 *
5972 * [68] EntityRef ::= '&' Name ';'
5973 *
5974 * [ WFC: Entity Declared ]
5975 * In a document without any DTD, a document with only an internal DTD
5976 * subset which contains no parameter entity references, or a document
5977 * with "standalone='yes'", the Name given in the entity reference
5978 * must match that in an entity declaration, except that well-formed
5979 * documents need not declare any of the following entities: amp, lt,
5980 * gt, apos, quot. The declaration of a parameter entity must precede
5981 * any reference to it. Similarly, the declaration of a general entity
5982 * must precede any reference to it which appears in a default value in an
5983 * attribute-list declaration. Note that if entities are declared in the
5984 * external subset or in external parameter entities, a non-validating
5985 * processor is not obligated to read and process their declarations;
5986 * for such documents, the rule that an entity must be declared is a
5987 * well-formedness constraint only if standalone='yes'.
5988 *
5989 * [ WFC: Parsed Entity ]
5990 * An entity reference must not contain the name of an unparsed entity
5991 *
5992 * Returns the xmlEntityPtr if found, or NULL otherwise.
5993 */
5994xmlEntityPtr
5995xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005996 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005997 xmlEntityPtr ent = NULL;
5998
5999 GROW;
6000
6001 if (RAW == '&') {
6002 NEXT;
6003 name = xmlParseName(ctxt);
6004 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006005 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6006 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006007 } else {
6008 if (RAW == ';') {
6009 NEXT;
6010 /*
6011 * Ask first SAX for entity resolution, otherwise try the
6012 * predefined set.
6013 */
6014 if (ctxt->sax != NULL) {
6015 if (ctxt->sax->getEntity != NULL)
6016 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006017 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006018 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006019 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6020 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006021 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006022 }
Owen Taylor3473f882001-02-23 17:55:21 +00006023 }
6024 /*
6025 * [ WFC: Entity Declared ]
6026 * In a document without any DTD, a document with only an
6027 * internal DTD subset which contains no parameter entity
6028 * references, or a document with "standalone='yes'", the
6029 * Name given in the entity reference must match that in an
6030 * entity declaration, except that well-formed documents
6031 * need not declare any of the following entities: amp, lt,
6032 * gt, apos, quot.
6033 * The declaration of a parameter entity must precede any
6034 * reference to it.
6035 * Similarly, the declaration of a general entity must
6036 * precede any reference to it which appears in a default
6037 * value in an attribute-list declaration. Note that if
6038 * entities are declared in the external subset or in
6039 * external parameter entities, a non-validating processor
6040 * is not obligated to read and process their declarations;
6041 * for such documents, the rule that an entity must be
6042 * declared is a well-formedness constraint only if
6043 * standalone='yes'.
6044 */
6045 if (ent == NULL) {
6046 if ((ctxt->standalone == 1) ||
6047 ((ctxt->hasExternalSubset == 0) &&
6048 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006049 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006050 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006051 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006052 } else {
6053 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00006054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00006055 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00006056 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006057 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006058 }
6059 }
6060
6061 /*
6062 * [ WFC: Parsed Entity ]
6063 * An entity reference must not contain the name of an
6064 * unparsed entity
6065 */
6066 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006067 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006068 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006069 }
6070
6071 /*
6072 * [ WFC: No External Entity References ]
6073 * Attribute values cannot contain direct or indirect
6074 * entity references to external entities.
6075 */
6076 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6077 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006078 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6079 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006080 }
6081 /*
6082 * [ WFC: No < in Attribute Values ]
6083 * The replacement text of any entity referred to directly or
6084 * indirectly in an attribute value (other than "&lt;") must
6085 * not contain a <.
6086 */
6087 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6088 (ent != NULL) &&
6089 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6090 (ent->content != NULL) &&
6091 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006092 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006093 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
6095
6096 /*
6097 * Internal check, no parameter entities here ...
6098 */
6099 else {
6100 switch (ent->etype) {
6101 case XML_INTERNAL_PARAMETER_ENTITY:
6102 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006103 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6104 "Attempt to reference the parameter entity '%s'\n",
6105 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006106 break;
6107 default:
6108 break;
6109 }
6110 }
6111
6112 /*
6113 * [ WFC: No Recursion ]
6114 * A parsed entity must not contain a recursive reference
6115 * to itself, either directly or indirectly.
6116 * Done somewhere else
6117 */
6118
6119 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006120 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006121 }
Owen Taylor3473f882001-02-23 17:55:21 +00006122 }
6123 }
6124 return(ent);
6125}
6126
6127/**
6128 * xmlParseStringEntityRef:
6129 * @ctxt: an XML parser context
6130 * @str: a pointer to an index in the string
6131 *
6132 * parse ENTITY references declarations, but this version parses it from
6133 * a string value.
6134 *
6135 * [68] EntityRef ::= '&' Name ';'
6136 *
6137 * [ WFC: Entity Declared ]
6138 * In a document without any DTD, a document with only an internal DTD
6139 * subset which contains no parameter entity references, or a document
6140 * with "standalone='yes'", the Name given in the entity reference
6141 * must match that in an entity declaration, except that well-formed
6142 * documents need not declare any of the following entities: amp, lt,
6143 * gt, apos, quot. The declaration of a parameter entity must precede
6144 * any reference to it. Similarly, the declaration of a general entity
6145 * must precede any reference to it which appears in a default value in an
6146 * attribute-list declaration. Note that if entities are declared in the
6147 * external subset or in external parameter entities, a non-validating
6148 * processor is not obligated to read and process their declarations;
6149 * for such documents, the rule that an entity must be declared is a
6150 * well-formedness constraint only if standalone='yes'.
6151 *
6152 * [ WFC: Parsed Entity ]
6153 * An entity reference must not contain the name of an unparsed entity
6154 *
6155 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6156 * is updated to the current location in the string.
6157 */
6158xmlEntityPtr
6159xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6160 xmlChar *name;
6161 const xmlChar *ptr;
6162 xmlChar cur;
6163 xmlEntityPtr ent = NULL;
6164
6165 if ((str == NULL) || (*str == NULL))
6166 return(NULL);
6167 ptr = *str;
6168 cur = *ptr;
6169 if (cur == '&') {
6170 ptr++;
6171 cur = *ptr;
6172 name = xmlParseStringName(ctxt, &ptr);
6173 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006174 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6175 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006176 } else {
6177 if (*ptr == ';') {
6178 ptr++;
6179 /*
6180 * Ask first SAX for entity resolution, otherwise try the
6181 * predefined set.
6182 */
6183 if (ctxt->sax != NULL) {
6184 if (ctxt->sax->getEntity != NULL)
6185 ent = ctxt->sax->getEntity(ctxt->userData, name);
6186 if (ent == NULL)
6187 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006188 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006189 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006190 }
Owen Taylor3473f882001-02-23 17:55:21 +00006191 }
6192 /*
6193 * [ WFC: Entity Declared ]
6194 * In a document without any DTD, a document with only an
6195 * internal DTD subset which contains no parameter entity
6196 * references, or a document with "standalone='yes'", the
6197 * Name given in the entity reference must match that in an
6198 * entity declaration, except that well-formed documents
6199 * need not declare any of the following entities: amp, lt,
6200 * gt, apos, quot.
6201 * The declaration of a parameter entity must precede any
6202 * reference to it.
6203 * Similarly, the declaration of a general entity must
6204 * precede any reference to it which appears in a default
6205 * value in an attribute-list declaration. Note that if
6206 * entities are declared in the external subset or in
6207 * external parameter entities, a non-validating processor
6208 * is not obligated to read and process their declarations;
6209 * for such documents, the rule that an entity must be
6210 * declared is a well-formedness constraint only if
6211 * standalone='yes'.
6212 */
6213 if (ent == NULL) {
6214 if ((ctxt->standalone == 1) ||
6215 ((ctxt->hasExternalSubset == 0) &&
6216 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006217 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006218 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006219 } else {
6220 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6221 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6222 ctxt->sax->warning(ctxt->userData,
6223 "Entity '%s' not defined\n", name);
6224 }
6225 }
6226
6227 /*
6228 * [ WFC: Parsed Entity ]
6229 * An entity reference must not contain the name of an
6230 * unparsed entity
6231 */
6232 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6233 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6235 ctxt->sax->error(ctxt->userData,
6236 "Entity reference to unparsed entity %s\n", name);
6237 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006238 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006239 }
6240
6241 /*
6242 * [ WFC: No External Entity References ]
6243 * Attribute values cannot contain direct or indirect
6244 * entity references to external entities.
6245 */
6246 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6247 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6248 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6250 ctxt->sax->error(ctxt->userData,
6251 "Attribute references external entity '%s'\n", name);
6252 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006254 }
6255 /*
6256 * [ WFC: No < in Attribute Values ]
6257 * The replacement text of any entity referred to directly or
6258 * indirectly in an attribute value (other than "&lt;") must
6259 * not contain a <.
6260 */
6261 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6262 (ent != NULL) &&
6263 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6264 (ent->content != NULL) &&
6265 (xmlStrchr(ent->content, '<'))) {
6266 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6268 ctxt->sax->error(ctxt->userData,
6269 "'<' in entity '%s' is not allowed in attributes values\n", name);
6270 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006271 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006272 }
6273
6274 /*
6275 * Internal check, no parameter entities here ...
6276 */
6277 else {
6278 switch (ent->etype) {
6279 case XML_INTERNAL_PARAMETER_ENTITY:
6280 case XML_EXTERNAL_PARAMETER_ENTITY:
6281 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6283 ctxt->sax->error(ctxt->userData,
6284 "Attempt to reference the parameter entity '%s'\n", name);
6285 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006286 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006287 break;
6288 default:
6289 break;
6290 }
6291 }
6292
6293 /*
6294 * [ WFC: No Recursion ]
6295 * A parsed entity must not contain a recursive reference
6296 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006297 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006298 */
6299
6300 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006301 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006302 }
6303 xmlFree(name);
6304 }
6305 }
6306 *str = ptr;
6307 return(ent);
6308}
6309
6310/**
6311 * xmlParsePEReference:
6312 * @ctxt: an XML parser context
6313 *
6314 * parse PEReference declarations
6315 * The entity content is handled directly by pushing it's content as
6316 * a new input stream.
6317 *
6318 * [69] PEReference ::= '%' Name ';'
6319 *
6320 * [ WFC: No Recursion ]
6321 * A parsed entity must not contain a recursive
6322 * reference to itself, either directly or indirectly.
6323 *
6324 * [ WFC: Entity Declared ]
6325 * In a document without any DTD, a document with only an internal DTD
6326 * subset which contains no parameter entity references, or a document
6327 * with "standalone='yes'", ... ... The declaration of a parameter
6328 * entity must precede any reference to it...
6329 *
6330 * [ VC: Entity Declared ]
6331 * In a document with an external subset or external parameter entities
6332 * with "standalone='no'", ... ... The declaration of a parameter entity
6333 * must precede any reference to it...
6334 *
6335 * [ WFC: In DTD ]
6336 * Parameter-entity references may only appear in the DTD.
6337 * NOTE: misleading but this is handled.
6338 */
6339void
6340xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006341 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006342 xmlEntityPtr entity = NULL;
6343 xmlParserInputPtr input;
6344
6345 if (RAW == '%') {
6346 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006347 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006348 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006349 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6350 "xmlParsePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006351 } else {
6352 if (RAW == ';') {
6353 NEXT;
6354 if ((ctxt->sax != NULL) &&
6355 (ctxt->sax->getParameterEntity != NULL))
6356 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6357 name);
6358 if (entity == NULL) {
6359 /*
6360 * [ WFC: Entity Declared ]
6361 * In a document without any DTD, a document with only an
6362 * internal DTD subset which contains no parameter entity
6363 * references, or a document with "standalone='yes'", ...
6364 * ... The declaration of a parameter entity must precede
6365 * any reference to it...
6366 */
6367 if ((ctxt->standalone == 1) ||
6368 ((ctxt->hasExternalSubset == 0) &&
6369 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006370 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006371 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006372 } else {
6373 /*
6374 * [ VC: Entity Declared ]
6375 * In a document with an external subset or external
6376 * parameter entities with "standalone='no'", ...
6377 * ... The declaration of a parameter entity must precede
6378 * any reference to it...
6379 */
6380 if ((!ctxt->disableSAX) &&
6381 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6382 ctxt->sax->warning(ctxt->userData,
6383 "PEReference: %%%s; not found\n", name);
6384 ctxt->valid = 0;
6385 }
6386 } else {
6387 /*
6388 * Internal checking in case the entity quest barfed
6389 */
6390 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6391 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6392 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6393 ctxt->sax->warning(ctxt->userData,
6394 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006395 } else if (ctxt->input->free != deallocblankswrapper) {
6396 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6397 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006398 } else {
6399 /*
6400 * TODO !!!
6401 * handle the extra spaces added before and after
6402 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6403 */
6404 input = xmlNewEntityInputStream(ctxt, entity);
6405 xmlPushInput(ctxt, input);
6406 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6407 (RAW == '<') && (NXT(1) == '?') &&
6408 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6409 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6410 xmlParseTextDecl(ctxt);
6411 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6412 /*
6413 * The XML REC instructs us to stop parsing
6414 * right here
6415 */
6416 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006417 return;
6418 }
6419 }
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
6421 }
6422 ctxt->hasPErefs = 1;
6423 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006424 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006425 }
Owen Taylor3473f882001-02-23 17:55:21 +00006426 }
6427 }
6428}
6429
6430/**
6431 * xmlParseStringPEReference:
6432 * @ctxt: an XML parser context
6433 * @str: a pointer to an index in the string
6434 *
6435 * parse PEReference declarations
6436 *
6437 * [69] PEReference ::= '%' Name ';'
6438 *
6439 * [ WFC: No Recursion ]
6440 * A parsed entity must not contain a recursive
6441 * reference to itself, either directly or indirectly.
6442 *
6443 * [ WFC: Entity Declared ]
6444 * In a document without any DTD, a document with only an internal DTD
6445 * subset which contains no parameter entity references, or a document
6446 * with "standalone='yes'", ... ... The declaration of a parameter
6447 * entity must precede any reference to it...
6448 *
6449 * [ VC: Entity Declared ]
6450 * In a document with an external subset or external parameter entities
6451 * with "standalone='no'", ... ... The declaration of a parameter entity
6452 * must precede any reference to it...
6453 *
6454 * [ WFC: In DTD ]
6455 * Parameter-entity references may only appear in the DTD.
6456 * NOTE: misleading but this is handled.
6457 *
6458 * Returns the string of the entity content.
6459 * str is updated to the current value of the index
6460 */
6461xmlEntityPtr
6462xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6463 const xmlChar *ptr;
6464 xmlChar cur;
6465 xmlChar *name;
6466 xmlEntityPtr entity = NULL;
6467
6468 if ((str == NULL) || (*str == NULL)) return(NULL);
6469 ptr = *str;
6470 cur = *ptr;
6471 if (cur == '%') {
6472 ptr++;
6473 cur = *ptr;
6474 name = xmlParseStringName(ctxt, &ptr);
6475 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006476 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6477 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006478 } else {
6479 cur = *ptr;
6480 if (cur == ';') {
6481 ptr++;
6482 cur = *ptr;
6483 if ((ctxt->sax != NULL) &&
6484 (ctxt->sax->getParameterEntity != NULL))
6485 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6486 name);
6487 if (entity == NULL) {
6488 /*
6489 * [ WFC: Entity Declared ]
6490 * In a document without any DTD, a document with only an
6491 * internal DTD subset which contains no parameter entity
6492 * references, or a document with "standalone='yes'", ...
6493 * ... The declaration of a parameter entity must precede
6494 * any reference to it...
6495 */
6496 if ((ctxt->standalone == 1) ||
6497 ((ctxt->hasExternalSubset == 0) &&
6498 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006499 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006500 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006501 } else {
6502 /*
6503 * [ VC: Entity Declared ]
6504 * In a document with an external subset or external
6505 * parameter entities with "standalone='no'", ...
6506 * ... The declaration of a parameter entity must
6507 * precede any reference to it...
6508 */
6509 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6510 ctxt->sax->warning(ctxt->userData,
6511 "PEReference: %%%s; not found\n", name);
6512 ctxt->valid = 0;
6513 }
6514 } else {
6515 /*
6516 * Internal checking in case the entity quest barfed
6517 */
6518 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6519 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6520 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6521 ctxt->sax->warning(ctxt->userData,
6522 "Internal: %%%s; is not a parameter entity\n", name);
6523 }
6524 }
6525 ctxt->hasPErefs = 1;
6526 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006527 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006528 }
6529 xmlFree(name);
6530 }
6531 }
6532 *str = ptr;
6533 return(entity);
6534}
6535
6536/**
6537 * xmlParseDocTypeDecl:
6538 * @ctxt: an XML parser context
6539 *
6540 * parse a DOCTYPE declaration
6541 *
6542 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6543 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6544 *
6545 * [ VC: Root Element Type ]
6546 * The Name in the document type declaration must match the element
6547 * type of the root element.
6548 */
6549
6550void
6551xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006552 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006553 xmlChar *ExternalID = NULL;
6554 xmlChar *URI = NULL;
6555
6556 /*
6557 * We know that '<!DOCTYPE' has been detected.
6558 */
6559 SKIP(9);
6560
6561 SKIP_BLANKS;
6562
6563 /*
6564 * Parse the DOCTYPE name.
6565 */
6566 name = xmlParseName(ctxt);
6567 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006568 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6569 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006570 }
6571 ctxt->intSubName = name;
6572
6573 SKIP_BLANKS;
6574
6575 /*
6576 * Check for SystemID and ExternalID
6577 */
6578 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6579
6580 if ((URI != NULL) || (ExternalID != NULL)) {
6581 ctxt->hasExternalSubset = 1;
6582 }
6583 ctxt->extSubURI = URI;
6584 ctxt->extSubSystem = ExternalID;
6585
6586 SKIP_BLANKS;
6587
6588 /*
6589 * Create and update the internal subset.
6590 */
6591 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6592 (!ctxt->disableSAX))
6593 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6594
6595 /*
6596 * Is there any internal subset declarations ?
6597 * they are handled separately in xmlParseInternalSubset()
6598 */
6599 if (RAW == '[')
6600 return;
6601
6602 /*
6603 * We should be at the end of the DOCTYPE declaration.
6604 */
6605 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006606 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006607 }
6608 NEXT;
6609}
6610
6611/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006612 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006613 * @ctxt: an XML parser context
6614 *
6615 * parse the internal subset declaration
6616 *
6617 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6618 */
6619
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006620static void
Owen Taylor3473f882001-02-23 17:55:21 +00006621xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6622 /*
6623 * Is there any DTD definition ?
6624 */
6625 if (RAW == '[') {
6626 ctxt->instate = XML_PARSER_DTD;
6627 NEXT;
6628 /*
6629 * Parse the succession of Markup declarations and
6630 * PEReferences.
6631 * Subsequence (markupdecl | PEReference | S)*
6632 */
6633 while (RAW != ']') {
6634 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006635 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006636
6637 SKIP_BLANKS;
6638 xmlParseMarkupDecl(ctxt);
6639 xmlParsePEReference(ctxt);
6640
6641 /*
6642 * Pop-up of finished entities.
6643 */
6644 while ((RAW == 0) && (ctxt->inputNr > 1))
6645 xmlPopInput(ctxt);
6646
6647 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006648 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006649 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006650 break;
6651 }
6652 }
6653 if (RAW == ']') {
6654 NEXT;
6655 SKIP_BLANKS;
6656 }
6657 }
6658
6659 /*
6660 * We should be at the end of the DOCTYPE declaration.
6661 */
6662 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006663 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006664 }
6665 NEXT;
6666}
6667
6668/**
6669 * xmlParseAttribute:
6670 * @ctxt: an XML parser context
6671 * @value: a xmlChar ** used to store the value of the attribute
6672 *
6673 * parse an attribute
6674 *
6675 * [41] Attribute ::= Name Eq AttValue
6676 *
6677 * [ WFC: No External Entity References ]
6678 * Attribute values cannot contain direct or indirect entity references
6679 * to external entities.
6680 *
6681 * [ WFC: No < in Attribute Values ]
6682 * The replacement text of any entity referred to directly or indirectly in
6683 * an attribute value (other than "&lt;") must not contain a <.
6684 *
6685 * [ VC: Attribute Value Type ]
6686 * The attribute must have been declared; the value must be of the type
6687 * declared for it.
6688 *
6689 * [25] Eq ::= S? '=' S?
6690 *
6691 * With namespace:
6692 *
6693 * [NS 11] Attribute ::= QName Eq AttValue
6694 *
6695 * Also the case QName == xmlns:??? is handled independently as a namespace
6696 * definition.
6697 *
6698 * Returns the attribute name, and the value in *value.
6699 */
6700
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006701const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006702xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006703 const xmlChar *name;
6704 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006705
6706 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006707 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006708 name = xmlParseName(ctxt);
6709 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6711 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006712 return(NULL);
6713 }
6714
6715 /*
6716 * read the value
6717 */
6718 SKIP_BLANKS;
6719 if (RAW == '=') {
6720 NEXT;
6721 SKIP_BLANKS;
6722 val = xmlParseAttValue(ctxt);
6723 ctxt->instate = XML_PARSER_CONTENT;
6724 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006725 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006726 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006727 return(NULL);
6728 }
6729
6730 /*
6731 * Check that xml:lang conforms to the specification
6732 * No more registered as an error, just generate a warning now
6733 * since this was deprecated in XML second edition
6734 */
6735 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6736 if (!xmlCheckLanguageID(val)) {
6737 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6738 ctxt->sax->warning(ctxt->userData,
6739 "Malformed value for xml:lang : %s\n", val);
6740 }
6741 }
6742
6743 /*
6744 * Check that xml:space conforms to the specification
6745 */
6746 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6747 if (xmlStrEqual(val, BAD_CAST "default"))
6748 *(ctxt->space) = 0;
6749 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6750 *(ctxt->space) = 1;
6751 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006752 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006753"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006754 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006755 }
6756 }
6757
6758 *value = val;
6759 return(name);
6760}
6761
6762/**
6763 * xmlParseStartTag:
6764 * @ctxt: an XML parser context
6765 *
6766 * parse a start of tag either for rule element or
6767 * EmptyElement. In both case we don't parse the tag closing chars.
6768 *
6769 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6770 *
6771 * [ WFC: Unique Att Spec ]
6772 * No attribute name may appear more than once in the same start-tag or
6773 * empty-element tag.
6774 *
6775 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6776 *
6777 * [ WFC: Unique Att Spec ]
6778 * No attribute name may appear more than once in the same start-tag or
6779 * empty-element tag.
6780 *
6781 * With namespace:
6782 *
6783 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6784 *
6785 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6786 *
6787 * Returns the element name parsed
6788 */
6789
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006790const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006791xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006792 const xmlChar *name;
6793 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006794 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006795 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006796 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006797 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 int i;
6799
6800 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006801 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006802
6803 name = xmlParseName(ctxt);
6804 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006805 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006806 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006807 return(NULL);
6808 }
6809
6810 /*
6811 * Now parse the attributes, it ends up with the ending
6812 *
6813 * (S Attribute)* S?
6814 */
6815 SKIP_BLANKS;
6816 GROW;
6817
Daniel Veillard21a0f912001-02-25 19:54:14 +00006818 while ((RAW != '>') &&
6819 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006820 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006821 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006822 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006823
6824 attname = xmlParseAttribute(ctxt, &attvalue);
6825 if ((attname != NULL) && (attvalue != NULL)) {
6826 /*
6827 * [ WFC: Unique Att Spec ]
6828 * No attribute name may appear more than once in the same
6829 * start-tag or empty-element tag.
6830 */
6831 for (i = 0; i < nbatts;i += 2) {
6832 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006833 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006834 xmlFree(attvalue);
6835 goto failed;
6836 }
6837 }
Owen Taylor3473f882001-02-23 17:55:21 +00006838 /*
6839 * Add the pair to atts
6840 */
6841 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006842 maxatts = 22; /* allow for 10 attrs by default */
6843 atts = (const xmlChar **)
6844 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006845 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006846 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006847 if (attvalue != NULL)
6848 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006849 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006850 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006851 ctxt->atts = atts;
6852 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006853 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006854 const xmlChar **n;
6855
Owen Taylor3473f882001-02-23 17:55:21 +00006856 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006857 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006858 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006859 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006860 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006861 if (attvalue != NULL)
6862 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006863 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006864 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006865 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006866 ctxt->atts = atts;
6867 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006868 }
6869 atts[nbatts++] = attname;
6870 atts[nbatts++] = attvalue;
6871 atts[nbatts] = NULL;
6872 atts[nbatts + 1] = NULL;
6873 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006874 if (attvalue != NULL)
6875 xmlFree(attvalue);
6876 }
6877
6878failed:
6879
Daniel Veillard3772de32002-12-17 10:31:45 +00006880 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006881 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6882 break;
6883 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006884 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6885 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006886 }
6887 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006888 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6889 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006890 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6891 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006892 break;
6893 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006894 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006895 GROW;
6896 }
6897
6898 /*
6899 * SAX: Start of Element !
6900 */
6901 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006902 (!ctxt->disableSAX)) {
6903 if (nbatts > 0)
6904 ctxt->sax->startElement(ctxt->userData, name, atts);
6905 else
6906 ctxt->sax->startElement(ctxt->userData, name, NULL);
6907 }
Owen Taylor3473f882001-02-23 17:55:21 +00006908
6909 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006910 /* Free only the content strings */
6911 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006912 if (atts[i] != NULL)
6913 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006914 }
6915 return(name);
6916}
6917
6918/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006919 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006920 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006921 * @line: line of the start tag
6922 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006923 *
6924 * parse an end of tag
6925 *
6926 * [42] ETag ::= '</' Name S? '>'
6927 *
6928 * With namespace
6929 *
6930 * [NS 9] ETag ::= '</' QName S? '>'
6931 */
6932
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006933static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006934xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006935 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006936
6937 GROW;
6938 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006939 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6940 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006941 return;
6942 }
6943 SKIP(2);
6944
Daniel Veillard46de64e2002-05-29 08:21:33 +00006945 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006946
6947 /*
6948 * We should definitely be at the ending "S? '>'" part
6949 */
6950 GROW;
6951 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006952 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006953 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006954 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006955 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006956
6957 /*
6958 * [ WFC: Element Type Match ]
6959 * The Name in an element's end-tag must match the element type in the
6960 * start-tag.
6961 *
6962 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006963 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006964 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006966 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006967 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006968 "Opening and ending tag mismatch: %s line %d and %s\n",
6969 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006970 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006971 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006972 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 }
6974
6975 }
6976 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006977 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006978 }
6979
6980 /*
6981 * SAX: End of Tag
6982 */
6983 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6984 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006985 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006986
Daniel Veillarde57ec792003-09-10 10:50:59 +00006987 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006988 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006989 return;
6990}
6991
6992/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006993 * xmlParseEndTag:
6994 * @ctxt: an XML parser context
6995 *
6996 * parse an end of tag
6997 *
6998 * [42] ETag ::= '</' Name S? '>'
6999 *
7000 * With namespace
7001 *
7002 * [NS 9] ETag ::= '</' QName S? '>'
7003 */
7004
7005void
7006xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007007 xmlParseEndTag1(ctxt, 0);
7008}
7009
7010/************************************************************************
7011 * *
7012 * SAX 2 specific operations *
7013 * *
7014 ************************************************************************/
7015
7016static const xmlChar *
7017xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7018 int len = 0, l;
7019 int c;
7020 int count = 0;
7021
7022 /*
7023 * Handler for more complex cases
7024 */
7025 GROW;
7026 c = CUR_CHAR(l);
7027 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007028 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007029 return(NULL);
7030 }
7031
7032 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7033 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007034 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard0fb18932003-09-07 09:14:37 +00007035 (IS_COMBINING(c)) ||
7036 (IS_EXTENDER(c)))) {
7037 if (count++ > 100) {
7038 count = 0;
7039 GROW;
7040 }
7041 len += l;
7042 NEXTL(l);
7043 c = CUR_CHAR(l);
7044 }
7045 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7046}
7047
7048/*
7049 * xmlGetNamespace:
7050 * @ctxt: an XML parser context
7051 * @prefix: the prefix to lookup
7052 *
7053 * Lookup the namespace name for the @prefix (which ca be NULL)
7054 * The prefix must come from the @ctxt->dict dictionnary
7055 *
7056 * Returns the namespace name or NULL if not bound
7057 */
7058static const xmlChar *
7059xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7060 int i;
7061
Daniel Veillarde57ec792003-09-10 10:50:59 +00007062 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007063 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007064 if (ctxt->nsTab[i] == prefix) {
7065 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7066 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007067 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007068 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007069 return(NULL);
7070}
7071
7072/**
7073 * xmlParseNCName:
7074 * @ctxt: an XML parser context
7075 *
7076 * parse an XML name.
7077 *
7078 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7079 * CombiningChar | Extender
7080 *
7081 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7082 *
7083 * Returns the Name parsed or NULL
7084 */
7085
7086static const xmlChar *
7087xmlParseNCName(xmlParserCtxtPtr ctxt) {
7088 const xmlChar *in;
7089 const xmlChar *ret;
7090 int count = 0;
7091
7092 /*
7093 * Accelerator for simple ASCII names
7094 */
7095 in = ctxt->input->cur;
7096 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7097 ((*in >= 0x41) && (*in <= 0x5A)) ||
7098 (*in == '_')) {
7099 in++;
7100 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7101 ((*in >= 0x41) && (*in <= 0x5A)) ||
7102 ((*in >= 0x30) && (*in <= 0x39)) ||
7103 (*in == '_') || (*in == '-') ||
7104 (*in == '.'))
7105 in++;
7106 if ((*in > 0) && (*in < 0x80)) {
7107 count = in - ctxt->input->cur;
7108 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7109 ctxt->input->cur = in;
7110 ctxt->nbChars += count;
7111 ctxt->input->col += count;
7112 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007113 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007114 }
7115 return(ret);
7116 }
7117 }
7118 return(xmlParseNCNameComplex(ctxt));
7119}
7120
7121/**
7122 * xmlParseQName:
7123 * @ctxt: an XML parser context
7124 * @prefix: pointer to store the prefix part
7125 *
7126 * parse an XML Namespace QName
7127 *
7128 * [6] QName ::= (Prefix ':')? LocalPart
7129 * [7] Prefix ::= NCName
7130 * [8] LocalPart ::= NCName
7131 *
7132 * Returns the Name parsed or NULL
7133 */
7134
7135static const xmlChar *
7136xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7137 const xmlChar *l, *p;
7138
7139 GROW;
7140
7141 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007142 if (l == NULL) {
7143 if (CUR == ':') {
7144 l = xmlParseName(ctxt);
7145 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007146 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7147 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007148 *prefix = NULL;
7149 return(l);
7150 }
7151 }
7152 return(NULL);
7153 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007154 if (CUR == ':') {
7155 NEXT;
7156 p = l;
7157 l = xmlParseNCName(ctxt);
7158 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007159 xmlChar *tmp;
7160
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007161 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7162 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007163 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7164 p = xmlDictLookup(ctxt->dict, tmp, -1);
7165 if (tmp != NULL) xmlFree(tmp);
7166 *prefix = NULL;
7167 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007168 }
7169 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007170 xmlChar *tmp;
7171
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007172 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7173 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007174 NEXT;
7175 tmp = (xmlChar *) xmlParseName(ctxt);
7176 if (tmp != NULL) {
7177 tmp = xmlBuildQName(tmp, l, NULL, 0);
7178 l = xmlDictLookup(ctxt->dict, tmp, -1);
7179 if (tmp != NULL) xmlFree(tmp);
7180 *prefix = p;
7181 return(l);
7182 }
7183 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7184 l = xmlDictLookup(ctxt->dict, tmp, -1);
7185 if (tmp != NULL) xmlFree(tmp);
7186 *prefix = p;
7187 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007188 }
7189 *prefix = p;
7190 } else
7191 *prefix = NULL;
7192 return(l);
7193}
7194
7195/**
7196 * xmlParseQNameAndCompare:
7197 * @ctxt: an XML parser context
7198 * @name: the localname
7199 * @prefix: the prefix, if any.
7200 *
7201 * parse an XML name and compares for match
7202 * (specialized for endtag parsing)
7203 *
7204 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7205 * and the name for mismatch
7206 */
7207
7208static const xmlChar *
7209xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7210 xmlChar const *prefix) {
7211 const xmlChar *cmp = name;
7212 const xmlChar *in;
7213 const xmlChar *ret;
7214 const xmlChar *prefix2;
7215
7216 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7217
7218 GROW;
7219 in = ctxt->input->cur;
7220
7221 cmp = prefix;
7222 while (*in != 0 && *in == *cmp) {
7223 ++in;
7224 ++cmp;
7225 }
7226 if ((*cmp == 0) && (*in == ':')) {
7227 in++;
7228 cmp = name;
7229 while (*in != 0 && *in == *cmp) {
7230 ++in;
7231 ++cmp;
7232 }
7233 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7234 /* success */
7235 ctxt->input->cur = in;
7236 return((const xmlChar*) 1);
7237 }
7238 }
7239 /*
7240 * all strings coms from the dictionary, equality can be done directly
7241 */
7242 ret = xmlParseQName (ctxt, &prefix2);
7243 if ((ret == name) && (prefix == prefix2))
7244 return((const xmlChar*) 1);
7245 return ret;
7246}
7247
7248/**
7249 * xmlParseAttValueInternal:
7250 * @ctxt: an XML parser context
7251 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007252 * @alloc: whether the attribute was reallocated as a new string
7253 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007254 *
7255 * parse a value for an attribute.
7256 * NOTE: if no normalization is needed, the routine will return pointers
7257 * directly from the data buffer.
7258 *
7259 * 3.3.3 Attribute-Value Normalization:
7260 * Before the value of an attribute is passed to the application or
7261 * checked for validity, the XML processor must normalize it as follows:
7262 * - a character reference is processed by appending the referenced
7263 * character to the attribute value
7264 * - an entity reference is processed by recursively processing the
7265 * replacement text of the entity
7266 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7267 * appending #x20 to the normalized value, except that only a single
7268 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7269 * parsed entity or the literal entity value of an internal parsed entity
7270 * - other characters are processed by appending them to the normalized value
7271 * If the declared value is not CDATA, then the XML processor must further
7272 * process the normalized attribute value by discarding any leading and
7273 * trailing space (#x20) characters, and by replacing sequences of space
7274 * (#x20) characters by a single space (#x20) character.
7275 * All attributes for which no declaration has been read should be treated
7276 * by a non-validating parser as if declared CDATA.
7277 *
7278 * Returns the AttValue parsed or NULL. The value has to be freed by the
7279 * caller if it was copied, this can be detected by val[*len] == 0.
7280 */
7281
7282static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007283xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7284 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007285{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007286 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007287 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007288 xmlChar *ret = NULL;
7289
7290 GROW;
7291 in = (xmlChar *) CUR_PTR;
7292 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007293 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007294 return (NULL);
7295 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007296 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007297
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007298 /*
7299 * try to handle in this routine the most common case where no
7300 * allocation of a new string is required and where content is
7301 * pure ASCII.
7302 */
7303 limit = *in++;
7304 end = ctxt->input->end;
7305 start = in;
7306 if (in >= end) {
7307 const xmlChar *oldbase = ctxt->input->base;
7308 GROW;
7309 if (oldbase != ctxt->input->base) {
7310 long delta = ctxt->input->base - oldbase;
7311 start = start + delta;
7312 in = in + delta;
7313 }
7314 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007316 if (normalize) {
7317 /*
7318 * Skip any leading spaces
7319 */
7320 while ((in < end) && (*in != limit) &&
7321 ((*in == 0x20) || (*in == 0x9) ||
7322 (*in == 0xA) || (*in == 0xD))) {
7323 in++;
7324 start = in;
7325 if (in >= end) {
7326 const xmlChar *oldbase = ctxt->input->base;
7327 GROW;
7328 if (oldbase != ctxt->input->base) {
7329 long delta = ctxt->input->base - oldbase;
7330 start = start + delta;
7331 in = in + delta;
7332 }
7333 end = ctxt->input->end;
7334 }
7335 }
7336 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7337 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7338 if ((*in++ == 0x20) && (*in == 0x20)) break;
7339 if (in >= end) {
7340 const xmlChar *oldbase = ctxt->input->base;
7341 GROW;
7342 if (oldbase != ctxt->input->base) {
7343 long delta = ctxt->input->base - oldbase;
7344 start = start + delta;
7345 in = in + delta;
7346 }
7347 end = ctxt->input->end;
7348 }
7349 }
7350 last = in;
7351 /*
7352 * skip the trailing blanks
7353 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007354 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007355 while ((in < end) && (*in != limit) &&
7356 ((*in == 0x20) || (*in == 0x9) ||
7357 (*in == 0xA) || (*in == 0xD))) {
7358 in++;
7359 if (in >= end) {
7360 const xmlChar *oldbase = ctxt->input->base;
7361 GROW;
7362 if (oldbase != ctxt->input->base) {
7363 long delta = ctxt->input->base - oldbase;
7364 start = start + delta;
7365 in = in + delta;
7366 last = last + delta;
7367 }
7368 end = ctxt->input->end;
7369 }
7370 }
7371 if (*in != limit) goto need_complex;
7372 } else {
7373 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7374 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7375 in++;
7376 if (in >= end) {
7377 const xmlChar *oldbase = ctxt->input->base;
7378 GROW;
7379 if (oldbase != ctxt->input->base) {
7380 long delta = ctxt->input->base - oldbase;
7381 start = start + delta;
7382 in = in + delta;
7383 }
7384 end = ctxt->input->end;
7385 }
7386 }
7387 last = in;
7388 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007390 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007392 *len = last - start;
7393 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007394 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007395 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007396 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007397 }
7398 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007399 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007401need_complex:
7402 if (alloc) *alloc = 1;
7403 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007404}
7405
7406/**
7407 * xmlParseAttribute2:
7408 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007409 * @pref: the element prefix
7410 * @elem: the element name
7411 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007412 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007413 * @len: an int * to save the length of the attribute
7414 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007415 *
7416 * parse an attribute in the new SAX2 framework.
7417 *
7418 * Returns the attribute name, and the value in *value, .
7419 */
7420
7421static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007422xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7423 const xmlChar *pref, const xmlChar *elem,
7424 const xmlChar **prefix, xmlChar **value,
7425 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 const xmlChar *name;
7427 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007428 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007429
7430 *value = NULL;
7431 GROW;
7432 name = xmlParseQName(ctxt, prefix);
7433 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007434 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7435 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007436 return(NULL);
7437 }
7438
7439 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007440 * get the type if needed
7441 */
7442 if (ctxt->attsSpecial != NULL) {
7443 int type;
7444
7445 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7446 pref, elem, *prefix, name);
7447 if (type != 0) normalize = 1;
7448 }
7449
7450 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007451 * read the value
7452 */
7453 SKIP_BLANKS;
7454 if (RAW == '=') {
7455 NEXT;
7456 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007457 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007458 ctxt->instate = XML_PARSER_CONTENT;
7459 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007460 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007462 return(NULL);
7463 }
7464
7465 /*
7466 * Check that xml:lang conforms to the specification
7467 * No more registered as an error, just generate a warning now
7468 * since this was deprecated in XML second edition
7469 */
7470 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7471 if (!xmlCheckLanguageID(val)) {
7472 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7473 ctxt->sax->warning(ctxt->userData,
7474 "Malformed value for xml:lang : %s\n", val);
7475 }
7476 }
7477
7478 /*
7479 * Check that xml:space conforms to the specification
7480 */
7481 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7482 if (xmlStrEqual(val, BAD_CAST "default"))
7483 *(ctxt->space) = 0;
7484 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7485 *(ctxt->space) = 1;
7486 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007487 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7489 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007490 }
7491 }
7492
7493 *value = val;
7494 return(name);
7495}
7496
7497/**
7498 * xmlParseStartTag2:
7499 * @ctxt: an XML parser context
7500 *
7501 * parse a start of tag either for rule element or
7502 * EmptyElement. In both case we don't parse the tag closing chars.
7503 * This routine is called when running SAX2 parsing
7504 *
7505 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7506 *
7507 * [ WFC: Unique Att Spec ]
7508 * No attribute name may appear more than once in the same start-tag or
7509 * empty-element tag.
7510 *
7511 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7512 *
7513 * [ WFC: Unique Att Spec ]
7514 * No attribute name may appear more than once in the same start-tag or
7515 * empty-element tag.
7516 *
7517 * With namespace:
7518 *
7519 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7520 *
7521 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7522 *
7523 * Returns the element name parsed
7524 */
7525
7526static const xmlChar *
7527xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7528 const xmlChar **URI) {
7529 const xmlChar *localname;
7530 const xmlChar *prefix;
7531 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007532 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007533 const xmlChar *nsname;
7534 xmlChar *attvalue;
7535 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007536 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007537 int nratts, nbatts, nbdef;
7538 int i, j, nbNs, attval;
7539 const xmlChar *base;
7540 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007541
7542 if (RAW != '<') return(NULL);
7543 NEXT1;
7544
7545 /*
7546 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7547 * point since the attribute values may be stored as pointers to
7548 * the buffer and calling SHRINK would destroy them !
7549 * The Shrinking is only possible once the full set of attribute
7550 * callbacks have been done.
7551 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007552reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007553 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007554 base = ctxt->input->base;
7555 cur = ctxt->input->cur - ctxt->input->base;
7556 nbatts = 0;
7557 nratts = 0;
7558 nbdef = 0;
7559 nbNs = 0;
7560 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561
7562 localname = xmlParseQName(ctxt, &prefix);
7563 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7565 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007566 return(NULL);
7567 }
7568
7569 /*
7570 * Now parse the attributes, it ends up with the ending
7571 *
7572 * (S Attribute)* S?
7573 */
7574 SKIP_BLANKS;
7575 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007576 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577
7578 while ((RAW != '>') &&
7579 ((RAW != '/') || (NXT(1) != '>')) &&
7580 (IS_CHAR((unsigned int) RAW))) {
7581 const xmlChar *q = CUR_PTR;
7582 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007583 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007584
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007585 attname = xmlParseAttribute2(ctxt, prefix, localname,
7586 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007587 if ((attname != NULL) && (attvalue != NULL)) {
7588 if (len < 0) len = xmlStrlen(attvalue);
7589 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007590 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7591 xmlURIPtr uri;
7592
7593 if (*URL != 0) {
7594 uri = xmlParseURI((const char *) URL);
7595 if (uri == NULL) {
7596 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7597 ctxt->sax->warning(ctxt->userData,
7598 "xmlns: %s not a valid URI\n", URL);
7599 } else {
7600 if (uri->scheme == NULL) {
7601 if ((ctxt->sax != NULL) &&
7602 (ctxt->sax->warning != NULL))
7603 ctxt->sax->warning(ctxt->userData,
7604 "xmlns: URI %s is not absolute\n", URL);
7605 }
7606 xmlFreeURI(uri);
7607 }
7608 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007610 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007612 for (j = 1;j <= nbNs;j++)
7613 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7614 break;
7615 if (j <= nbNs)
7616 xmlErrAttributeDup(ctxt, NULL, attname);
7617 else
7618 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619 if (alloc != 0) xmlFree(attvalue);
7620 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007621 continue;
7622 }
7623 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7625 xmlURIPtr uri;
7626
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007627 if (attname == ctxt->str_xml) {
7628 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007629 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7630 "xml namespace prefix mapped to wrong URI\n",
7631 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007632 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007633 /*
7634 * Do not keep a namespace definition node
7635 */
7636 if (alloc != 0) xmlFree(attvalue);
7637 SKIP_BLANKS;
7638 continue;
7639 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007640 uri = xmlParseURI((const char *) URL);
7641 if (uri == NULL) {
7642 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7643 ctxt->sax->warning(ctxt->userData,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007644 "xmlns:%s: '%s' is not a valid URI\n",
7645 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007646 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007647 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007648 if ((ctxt->sax != NULL) &&
7649 (ctxt->sax->warning != NULL))
7650 ctxt->sax->warning(ctxt->userData,
7651 "xmlns:%s: URI %s is not absolute\n",
7652 attname, URL);
7653 }
7654 xmlFreeURI(uri);
7655 }
7656
Daniel Veillard0fb18932003-09-07 09:14:37 +00007657 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007658 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007659 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007660 for (j = 1;j <= nbNs;j++)
7661 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7662 break;
7663 if (j <= nbNs)
7664 xmlErrAttributeDup(ctxt, aprefix, attname);
7665 else
7666 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007667 if (alloc != 0) xmlFree(attvalue);
7668 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007669 continue;
7670 }
7671
7672 /*
7673 * Add the pair to atts
7674 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007675 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7676 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007677 if (attvalue[len] == 0)
7678 xmlFree(attvalue);
7679 goto failed;
7680 }
7681 maxatts = ctxt->maxatts;
7682 atts = ctxt->atts;
7683 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007684 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007685 atts[nbatts++] = attname;
7686 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688 atts[nbatts++] = attvalue;
7689 attvalue += len;
7690 atts[nbatts++] = attvalue;
7691 /*
7692 * tag if some deallocation is needed
7693 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007694 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007695 } else {
7696 if ((attvalue != NULL) && (attvalue[len] == 0))
7697 xmlFree(attvalue);
7698 }
7699
7700failed:
7701
7702 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007703 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7705 break;
7706 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7708 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007709 }
7710 SKIP_BLANKS;
7711 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7712 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007713 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715 break;
7716 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007718 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007719 }
7720
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007722 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007724 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7726 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007727 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007728 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007729 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007730 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007731 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007732 /*
7733 * [ WFC: Unique Att Spec ]
7734 * No attribute name may appear more than once in the same
7735 * start-tag or empty-element tag.
7736 * As extended by the Namespace in XML REC.
7737 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007738 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 if (atts[i] == atts[j]) {
7740 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007741 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007742 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007743 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007744 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007745 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007746 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007747 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007748 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 }
7750 }
7751 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007752 }
7753
7754 /*
7755 * The attributes defaulting
7756 */
7757 if (ctxt->attsDefault != NULL) {
7758 xmlDefAttrsPtr defaults;
7759
7760 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7761 if (defaults != NULL) {
7762 for (i = 0;i < defaults->nbAttrs;i++) {
7763 attname = defaults->values[4 * i];
7764 aprefix = defaults->values[4 * i + 1];
7765
7766 /*
7767 * special work for namespaces defaulted defs
7768 */
7769 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7770 /*
7771 * check that it's not a defined namespace
7772 */
7773 for (j = 1;j <= nbNs;j++)
7774 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7775 break;
7776 if (j <= nbNs) continue;
7777
7778 nsname = xmlGetNamespace(ctxt, NULL);
7779 if (nsname != defaults->values[4 * i + 2]) {
7780 if (nsPush(ctxt, NULL,
7781 defaults->values[4 * i + 2]) > 0)
7782 nbNs++;
7783 }
7784 } else if (aprefix == ctxt->str_xmlns) {
7785 /*
7786 * check that it's not a defined namespace
7787 */
7788 for (j = 1;j <= nbNs;j++)
7789 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7790 break;
7791 if (j <= nbNs) continue;
7792
7793 nsname = xmlGetNamespace(ctxt, attname);
7794 if (nsname != defaults->values[2]) {
7795 if (nsPush(ctxt, attname,
7796 defaults->values[4 * i + 2]) > 0)
7797 nbNs++;
7798 }
7799 } else {
7800 /*
7801 * check that it's not a defined attribute
7802 */
7803 for (j = 0;j < nbatts;j+=5) {
7804 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7805 break;
7806 }
7807 if (j < nbatts) continue;
7808
7809 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7810 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
7811 goto failed;
7812 }
7813 maxatts = ctxt->maxatts;
7814 atts = ctxt->atts;
7815 }
7816 atts[nbatts++] = attname;
7817 atts[nbatts++] = aprefix;
7818 if (aprefix == NULL)
7819 atts[nbatts++] = NULL;
7820 else
7821 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7822 atts[nbatts++] = defaults->values[4 * i + 2];
7823 atts[nbatts++] = defaults->values[4 * i + 3];
7824 nbdef++;
7825 }
7826 }
7827 }
7828 }
7829
7830 nsname = xmlGetNamespace(ctxt, prefix);
7831 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007832 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7833 "Namespace prefix %s on %s is not defined\n",
7834 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007835 }
7836 *pref = prefix;
7837 *URI = nsname;
7838
7839 /*
7840 * SAX: Start of Element !
7841 */
7842 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7843 (!ctxt->disableSAX)) {
7844 if (nbNs > 0)
7845 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7846 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7847 nbatts / 5, nbdef, atts);
7848 else
7849 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7850 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7851 }
7852
7853 /*
7854 * Free up attribute allocated strings if needed
7855 */
7856 if (attval != 0) {
7857 for (i = 3,j = 0; j < nratts;i += 5,j++)
7858 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7859 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007860 }
7861
7862 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007863
7864base_changed:
7865 /*
7866 * the attribute strings are valid iif the base didn't changed
7867 */
7868 if (attval != 0) {
7869 for (i = 3,j = 0; j < nratts;i += 5,j++)
7870 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7871 xmlFree((xmlChar *) atts[i]);
7872 }
7873 ctxt->input->cur = ctxt->input->base + cur;
7874 if (ctxt->wellFormed == 1) {
7875 goto reparse;
7876 }
7877 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007878}
7879
7880/**
7881 * xmlParseEndTag2:
7882 * @ctxt: an XML parser context
7883 * @line: line of the start tag
7884 * @nsNr: number of namespaces on the start tag
7885 *
7886 * parse an end of tag
7887 *
7888 * [42] ETag ::= '</' Name S? '>'
7889 *
7890 * With namespace
7891 *
7892 * [NS 9] ETag ::= '</' QName S? '>'
7893 */
7894
7895static void
7896xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7897 const xmlChar *URI, int line, int nsNr) {
7898 const xmlChar *name;
7899
7900 GROW;
7901 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007902 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007903 return;
7904 }
7905 SKIP(2);
7906
7907 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7908
7909 /*
7910 * We should definitely be at the ending "S? '>'" part
7911 */
7912 GROW;
7913 SKIP_BLANKS;
7914 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007915 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007916 } else
7917 NEXT1;
7918
7919 /*
7920 * [ WFC: Element Type Match ]
7921 * The Name in an element's end-tag must match the element type in the
7922 * start-tag.
7923 *
7924 */
7925 if (name != (xmlChar*)1) {
7926 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
7927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7928 if (name != NULL) {
7929 ctxt->sax->error(ctxt->userData,
7930 "Opening and ending tag mismatch: %s line %d and %s\n",
7931 ctxt->name, line, name);
7932 } else {
7933 ctxt->sax->error(ctxt->userData,
7934 "Ending tag error for: %s line %d\n", ctxt->name, line);
7935 }
7936
7937 }
7938 ctxt->wellFormed = 0;
7939 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7940 }
7941
7942 /*
7943 * SAX: End of Tag
7944 */
7945 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7946 (!ctxt->disableSAX))
7947 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7948
Daniel Veillard0fb18932003-09-07 09:14:37 +00007949 spacePop(ctxt);
7950 if (nsNr != 0)
7951 nsPop(ctxt, nsNr);
7952 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007953}
7954
7955/**
Owen Taylor3473f882001-02-23 17:55:21 +00007956 * xmlParseCDSect:
7957 * @ctxt: an XML parser context
7958 *
7959 * Parse escaped pure raw content.
7960 *
7961 * [18] CDSect ::= CDStart CData CDEnd
7962 *
7963 * [19] CDStart ::= '<![CDATA['
7964 *
7965 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7966 *
7967 * [21] CDEnd ::= ']]>'
7968 */
7969void
7970xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7971 xmlChar *buf = NULL;
7972 int len = 0;
7973 int size = XML_PARSER_BUFFER_SIZE;
7974 int r, rl;
7975 int s, sl;
7976 int cur, l;
7977 int count = 0;
7978
7979 if ((NXT(0) == '<') && (NXT(1) == '!') &&
7980 (NXT(2) == '[') && (NXT(3) == 'C') &&
7981 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7982 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7983 (NXT(8) == '[')) {
7984 SKIP(9);
7985 } else
7986 return;
7987
7988 ctxt->instate = XML_PARSER_CDATA_SECTION;
7989 r = CUR_CHAR(rl);
7990 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007991 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007992 ctxt->instate = XML_PARSER_CONTENT;
7993 return;
7994 }
7995 NEXTL(rl);
7996 s = CUR_CHAR(sl);
7997 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007998 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007999 ctxt->instate = XML_PARSER_CONTENT;
8000 return;
8001 }
8002 NEXTL(sl);
8003 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008004 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008005 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008006 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008007 return;
8008 }
8009 while (IS_CHAR(cur) &&
8010 ((r != ']') || (s != ']') || (cur != '>'))) {
8011 if (len + 5 >= size) {
8012 size *= 2;
8013 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8014 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008015 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008016 return;
8017 }
8018 }
8019 COPY_BUF(rl,buf,len,r);
8020 r = s;
8021 rl = sl;
8022 s = cur;
8023 sl = l;
8024 count++;
8025 if (count > 50) {
8026 GROW;
8027 count = 0;
8028 }
8029 NEXTL(l);
8030 cur = CUR_CHAR(l);
8031 }
8032 buf[len] = 0;
8033 ctxt->instate = XML_PARSER_CONTENT;
8034 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008035 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008036 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008037 xmlFree(buf);
8038 return;
8039 }
8040 NEXTL(l);
8041
8042 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008043 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008044 */
8045 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8046 if (ctxt->sax->cdataBlock != NULL)
8047 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008048 else if (ctxt->sax->characters != NULL)
8049 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008050 }
8051 xmlFree(buf);
8052}
8053
8054/**
8055 * xmlParseContent:
8056 * @ctxt: an XML parser context
8057 *
8058 * Parse a content:
8059 *
8060 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8061 */
8062
8063void
8064xmlParseContent(xmlParserCtxtPtr ctxt) {
8065 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008066 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008067 ((RAW != '<') || (NXT(1) != '/'))) {
8068 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008069 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008070 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008071
8072 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008073 * First case : a Processing Instruction.
8074 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008075 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008076 xmlParsePI(ctxt);
8077 }
8078
8079 /*
8080 * Second case : a CDSection
8081 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008082 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008083 (NXT(2) == '[') && (NXT(3) == 'C') &&
8084 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8085 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8086 (NXT(8) == '[')) {
8087 xmlParseCDSect(ctxt);
8088 }
8089
8090 /*
8091 * Third case : a comment
8092 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008093 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008094 (NXT(2) == '-') && (NXT(3) == '-')) {
8095 xmlParseComment(ctxt);
8096 ctxt->instate = XML_PARSER_CONTENT;
8097 }
8098
8099 /*
8100 * Fourth case : a sub-element.
8101 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008102 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008103 xmlParseElement(ctxt);
8104 }
8105
8106 /*
8107 * Fifth case : a reference. If if has not been resolved,
8108 * parsing returns it's Name, create the node
8109 */
8110
Daniel Veillard21a0f912001-02-25 19:54:14 +00008111 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008112 xmlParseReference(ctxt);
8113 }
8114
8115 /*
8116 * Last case, text. Note that References are handled directly.
8117 */
8118 else {
8119 xmlParseCharData(ctxt, 0);
8120 }
8121
8122 GROW;
8123 /*
8124 * Pop-up of finished entities.
8125 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008126 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008127 xmlPopInput(ctxt);
8128 SHRINK;
8129
Daniel Veillardfdc91562002-07-01 21:52:03 +00008130 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008131 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8132 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008133 ctxt->instate = XML_PARSER_EOF;
8134 break;
8135 }
8136 }
8137}
8138
8139/**
8140 * xmlParseElement:
8141 * @ctxt: an XML parser context
8142 *
8143 * parse an XML element, this is highly recursive
8144 *
8145 * [39] element ::= EmptyElemTag | STag content ETag
8146 *
8147 * [ WFC: Element Type Match ]
8148 * The Name in an element's end-tag must match the element type in the
8149 * start-tag.
8150 *
Owen Taylor3473f882001-02-23 17:55:21 +00008151 */
8152
8153void
8154xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008155 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008156 const xmlChar *prefix;
8157 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008158 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008159 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008160 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008161 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008162
8163 /* Capture start position */
8164 if (ctxt->record_info) {
8165 node_info.begin_pos = ctxt->input->consumed +
8166 (CUR_PTR - ctxt->input->base);
8167 node_info.begin_line = ctxt->input->line;
8168 }
8169
8170 if (ctxt->spaceNr == 0)
8171 spacePush(ctxt, -1);
8172 else
8173 spacePush(ctxt, *ctxt->space);
8174
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008175 line = ctxt->input->line;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008176 if (ctxt->sax2)
8177 name = xmlParseStartTag2(ctxt, &prefix, &URI);
8178 else
8179 name = xmlParseStartTag(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008180 if (name == NULL) {
8181 spacePop(ctxt);
8182 return;
8183 }
8184 namePush(ctxt, name);
8185 ret = ctxt->node;
8186
8187 /*
8188 * [ VC: Root Element Type ]
8189 * The Name in the document type declaration must match the element
8190 * type of the root element.
8191 */
8192 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8193 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8194 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8195
8196 /*
8197 * Check for an Empty Element.
8198 */
8199 if ((RAW == '/') && (NXT(1) == '>')) {
8200 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 if (ctxt->sax2) {
8202 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8203 (!ctxt->disableSAX))
8204 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8205 } else {
8206 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8207 (!ctxt->disableSAX))
8208 ctxt->sax->endElement(ctxt->userData, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008209 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008210 namePop(ctxt);
8211 spacePop(ctxt);
8212 if (nsNr != ctxt->nsNr)
8213 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 if ( ret != NULL && ctxt->record_info ) {
8215 node_info.end_pos = ctxt->input->consumed +
8216 (CUR_PTR - ctxt->input->base);
8217 node_info.end_line = ctxt->input->line;
8218 node_info.node = ret;
8219 xmlParserAddNodeInfo(ctxt, &node_info);
8220 }
8221 return;
8222 }
8223 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008224 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008225 } else {
8226 ctxt->errNo = XML_ERR_GT_REQUIRED;
8227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8228 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008229 "Couldn't find end of Start Tag %s line %d\n",
8230 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008231 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008232 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008233
8234 /*
8235 * end of parsing of this node.
8236 */
8237 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008238 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008239 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008240 if (nsNr != ctxt->nsNr)
8241 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008242
8243 /*
8244 * Capture end position and add node
8245 */
8246 if ( ret != NULL && ctxt->record_info ) {
8247 node_info.end_pos = ctxt->input->consumed +
8248 (CUR_PTR - ctxt->input->base);
8249 node_info.end_line = ctxt->input->line;
8250 node_info.node = ret;
8251 xmlParserAddNodeInfo(ctxt, &node_info);
8252 }
8253 return;
8254 }
8255
8256 /*
8257 * Parse the content of the element:
8258 */
8259 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00008260 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00008261 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00008262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8263 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008264 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008265 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008266 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008267
8268 /*
8269 * end of parsing of this node.
8270 */
8271 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008272 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008273 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008274 if (nsNr != ctxt->nsNr)
8275 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008276 return;
8277 }
8278
8279 /*
8280 * parse the end of tag: '</' should be here.
8281 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008282 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008284 namePop(ctxt);
8285 } else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008286 xmlParseEndTag1(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008287
8288 /*
8289 * Capture end position and add node
8290 */
8291 if ( ret != NULL && ctxt->record_info ) {
8292 node_info.end_pos = ctxt->input->consumed +
8293 (CUR_PTR - ctxt->input->base);
8294 node_info.end_line = ctxt->input->line;
8295 node_info.node = ret;
8296 xmlParserAddNodeInfo(ctxt, &node_info);
8297 }
8298}
8299
8300/**
8301 * xmlParseVersionNum:
8302 * @ctxt: an XML parser context
8303 *
8304 * parse the XML version value.
8305 *
8306 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8307 *
8308 * Returns the string giving the XML version number, or NULL
8309 */
8310xmlChar *
8311xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8312 xmlChar *buf = NULL;
8313 int len = 0;
8314 int size = 10;
8315 xmlChar cur;
8316
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008317 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008318 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008319 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008320 return(NULL);
8321 }
8322 cur = CUR;
8323 while (((cur >= 'a') && (cur <= 'z')) ||
8324 ((cur >= 'A') && (cur <= 'Z')) ||
8325 ((cur >= '0') && (cur <= '9')) ||
8326 (cur == '_') || (cur == '.') ||
8327 (cur == ':') || (cur == '-')) {
8328 if (len + 1 >= size) {
8329 size *= 2;
8330 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8331 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008332 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008333 return(NULL);
8334 }
8335 }
8336 buf[len++] = cur;
8337 NEXT;
8338 cur=CUR;
8339 }
8340 buf[len] = 0;
8341 return(buf);
8342}
8343
8344/**
8345 * xmlParseVersionInfo:
8346 * @ctxt: an XML parser context
8347 *
8348 * parse the XML version.
8349 *
8350 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8351 *
8352 * [25] Eq ::= S? '=' S?
8353 *
8354 * Returns the version string, e.g. "1.0"
8355 */
8356
8357xmlChar *
8358xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8359 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008360
8361 if ((RAW == 'v') && (NXT(1) == 'e') &&
8362 (NXT(2) == 'r') && (NXT(3) == 's') &&
8363 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8364 (NXT(6) == 'n')) {
8365 SKIP(7);
8366 SKIP_BLANKS;
8367 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008368 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008369 return(NULL);
8370 }
8371 NEXT;
8372 SKIP_BLANKS;
8373 if (RAW == '"') {
8374 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008375 version = xmlParseVersionNum(ctxt);
8376 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008377 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008378 } else
8379 NEXT;
8380 } else if (RAW == '\''){
8381 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008382 version = xmlParseVersionNum(ctxt);
8383 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008384 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008385 } else
8386 NEXT;
8387 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008388 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 }
8390 }
8391 return(version);
8392}
8393
8394/**
8395 * xmlParseEncName:
8396 * @ctxt: an XML parser context
8397 *
8398 * parse the XML encoding name
8399 *
8400 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8401 *
8402 * Returns the encoding name value or NULL
8403 */
8404xmlChar *
8405xmlParseEncName(xmlParserCtxtPtr ctxt) {
8406 xmlChar *buf = NULL;
8407 int len = 0;
8408 int size = 10;
8409 xmlChar cur;
8410
8411 cur = CUR;
8412 if (((cur >= 'a') && (cur <= 'z')) ||
8413 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008414 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008415 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008416 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 return(NULL);
8418 }
8419
8420 buf[len++] = cur;
8421 NEXT;
8422 cur = CUR;
8423 while (((cur >= 'a') && (cur <= 'z')) ||
8424 ((cur >= 'A') && (cur <= 'Z')) ||
8425 ((cur >= '0') && (cur <= '9')) ||
8426 (cur == '.') || (cur == '_') ||
8427 (cur == '-')) {
8428 if (len + 1 >= size) {
8429 size *= 2;
8430 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8431 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008432 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008433 return(NULL);
8434 }
8435 }
8436 buf[len++] = cur;
8437 NEXT;
8438 cur = CUR;
8439 if (cur == 0) {
8440 SHRINK;
8441 GROW;
8442 cur = CUR;
8443 }
8444 }
8445 buf[len] = 0;
8446 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008447 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008448 }
8449 return(buf);
8450}
8451
8452/**
8453 * xmlParseEncodingDecl:
8454 * @ctxt: an XML parser context
8455 *
8456 * parse the XML encoding declaration
8457 *
8458 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8459 *
8460 * this setups the conversion filters.
8461 *
8462 * Returns the encoding value or NULL
8463 */
8464
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008465const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008466xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8467 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008468
8469 SKIP_BLANKS;
8470 if ((RAW == 'e') && (NXT(1) == 'n') &&
8471 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8472 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8473 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8474 SKIP(8);
8475 SKIP_BLANKS;
8476 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008477 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008478 return(NULL);
8479 }
8480 NEXT;
8481 SKIP_BLANKS;
8482 if (RAW == '"') {
8483 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008484 encoding = xmlParseEncName(ctxt);
8485 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008486 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008487 } else
8488 NEXT;
8489 } else if (RAW == '\''){
8490 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008491 encoding = xmlParseEncName(ctxt);
8492 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008493 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008494 } else
8495 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008496 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008497 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008498 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008499 /*
8500 * UTF-16 encoding stwich has already taken place at this stage,
8501 * more over the little-endian/big-endian selection is already done
8502 */
8503 if ((encoding != NULL) &&
8504 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8505 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008506 if (ctxt->encoding != NULL)
8507 xmlFree((xmlChar *) ctxt->encoding);
8508 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008509 }
8510 /*
8511 * UTF-8 encoding is handled natively
8512 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008513 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008514 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8515 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008516 if (ctxt->encoding != NULL)
8517 xmlFree((xmlChar *) ctxt->encoding);
8518 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008519 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008520 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008521 xmlCharEncodingHandlerPtr handler;
8522
8523 if (ctxt->input->encoding != NULL)
8524 xmlFree((xmlChar *) ctxt->input->encoding);
8525 ctxt->input->encoding = encoding;
8526
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008527 handler = xmlFindCharEncodingHandler((const char *) encoding);
8528 if (handler != NULL) {
8529 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008530 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008531 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8533 ctxt->sax->error(ctxt->userData,
8534 "Unsupported encoding %s\n", encoding);
8535 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008536 }
8537 }
8538 }
8539 return(encoding);
8540}
8541
8542/**
8543 * xmlParseSDDecl:
8544 * @ctxt: an XML parser context
8545 *
8546 * parse the XML standalone declaration
8547 *
8548 * [32] SDDecl ::= S 'standalone' Eq
8549 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8550 *
8551 * [ VC: Standalone Document Declaration ]
8552 * TODO The standalone document declaration must have the value "no"
8553 * if any external markup declarations contain declarations of:
8554 * - attributes with default values, if elements to which these
8555 * attributes apply appear in the document without specifications
8556 * of values for these attributes, or
8557 * - entities (other than amp, lt, gt, apos, quot), if references
8558 * to those entities appear in the document, or
8559 * - attributes with values subject to normalization, where the
8560 * attribute appears in the document with a value which will change
8561 * as a result of normalization, or
8562 * - element types with element content, if white space occurs directly
8563 * within any instance of those types.
8564 *
8565 * Returns 1 if standalone, 0 otherwise
8566 */
8567
8568int
8569xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8570 int standalone = -1;
8571
8572 SKIP_BLANKS;
8573 if ((RAW == 's') && (NXT(1) == 't') &&
8574 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8575 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8576 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8577 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8578 SKIP(10);
8579 SKIP_BLANKS;
8580 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008581 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008582 return(standalone);
8583 }
8584 NEXT;
8585 SKIP_BLANKS;
8586 if (RAW == '\''){
8587 NEXT;
8588 if ((RAW == 'n') && (NXT(1) == 'o')) {
8589 standalone = 0;
8590 SKIP(2);
8591 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8592 (NXT(2) == 's')) {
8593 standalone = 1;
8594 SKIP(3);
8595 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008596 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008597 }
8598 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008599 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008600 } else
8601 NEXT;
8602 } else if (RAW == '"'){
8603 NEXT;
8604 if ((RAW == 'n') && (NXT(1) == 'o')) {
8605 standalone = 0;
8606 SKIP(2);
8607 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8608 (NXT(2) == 's')) {
8609 standalone = 1;
8610 SKIP(3);
8611 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008612 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008613 }
8614 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008615 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008616 } else
8617 NEXT;
8618 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008619 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008620 }
8621 }
8622 return(standalone);
8623}
8624
8625/**
8626 * xmlParseXMLDecl:
8627 * @ctxt: an XML parser context
8628 *
8629 * parse an XML declaration header
8630 *
8631 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8632 */
8633
8634void
8635xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8636 xmlChar *version;
8637
8638 /*
8639 * We know that '<?xml' is here.
8640 */
8641 SKIP(5);
8642
8643 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008644 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8645 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008646 }
8647 SKIP_BLANKS;
8648
8649 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008650 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008651 */
8652 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008653 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008654 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008655 } else {
8656 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8657 /*
8658 * TODO: Blueberry should be detected here
8659 */
8660 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
8661 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
8662 version);
8663 }
8664 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008665 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008666 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008667 }
Owen Taylor3473f882001-02-23 17:55:21 +00008668
8669 /*
8670 * We may have the encoding declaration
8671 */
8672 if (!IS_BLANK(RAW)) {
8673 if ((RAW == '?') && (NXT(1) == '>')) {
8674 SKIP(2);
8675 return;
8676 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008677 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008678 }
8679 xmlParseEncodingDecl(ctxt);
8680 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8681 /*
8682 * The XML REC instructs us to stop parsing right here
8683 */
8684 return;
8685 }
8686
8687 /*
8688 * We may have the standalone status.
8689 */
8690 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8691 if ((RAW == '?') && (NXT(1) == '>')) {
8692 SKIP(2);
8693 return;
8694 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008696 }
8697 SKIP_BLANKS;
8698 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8699
8700 SKIP_BLANKS;
8701 if ((RAW == '?') && (NXT(1) == '>')) {
8702 SKIP(2);
8703 } else if (RAW == '>') {
8704 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008705 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008706 NEXT;
8707 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008708 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008709 MOVETO_ENDTAG(CUR_PTR);
8710 NEXT;
8711 }
8712}
8713
8714/**
8715 * xmlParseMisc:
8716 * @ctxt: an XML parser context
8717 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008718 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008719 *
8720 * [27] Misc ::= Comment | PI | S
8721 */
8722
8723void
8724xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008725 while (((RAW == '<') && (NXT(1) == '?')) ||
8726 ((RAW == '<') && (NXT(1) == '!') &&
8727 (NXT(2) == '-') && (NXT(3) == '-')) ||
8728 IS_BLANK(CUR)) {
8729 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008730 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008731 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008732 NEXT;
8733 } else
8734 xmlParseComment(ctxt);
8735 }
8736}
8737
8738/**
8739 * xmlParseDocument:
8740 * @ctxt: an XML parser context
8741 *
8742 * parse an XML document (and build a tree if using the standard SAX
8743 * interface).
8744 *
8745 * [1] document ::= prolog element Misc*
8746 *
8747 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8748 *
8749 * Returns 0, -1 in case of error. the parser context is augmented
8750 * as a result of the parsing.
8751 */
8752
8753int
8754xmlParseDocument(xmlParserCtxtPtr ctxt) {
8755 xmlChar start[4];
8756 xmlCharEncoding enc;
8757
8758 xmlInitParser();
8759
8760 GROW;
8761
8762 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 * SAX: detecting the level.
8764 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008765 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008766
8767 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008768 * SAX: beginning of the document processing.
8769 */
8770 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8771 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8772
Daniel Veillard50f34372001-08-03 12:06:36 +00008773 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008774 /*
8775 * Get the 4 first bytes and decode the charset
8776 * if enc != XML_CHAR_ENCODING_NONE
8777 * plug some encoding conversion routines.
8778 */
8779 start[0] = RAW;
8780 start[1] = NXT(1);
8781 start[2] = NXT(2);
8782 start[3] = NXT(3);
8783 enc = xmlDetectCharEncoding(start, 4);
8784 if (enc != XML_CHAR_ENCODING_NONE) {
8785 xmlSwitchEncoding(ctxt, enc);
8786 }
Owen Taylor3473f882001-02-23 17:55:21 +00008787 }
8788
8789
8790 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008791 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008792 }
8793
8794 /*
8795 * Check for the XMLDecl in the Prolog.
8796 */
8797 GROW;
8798 if ((RAW == '<') && (NXT(1) == '?') &&
8799 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8800 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8801
8802 /*
8803 * Note that we will switch encoding on the fly.
8804 */
8805 xmlParseXMLDecl(ctxt);
8806 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8807 /*
8808 * The XML REC instructs us to stop parsing right here
8809 */
8810 return(-1);
8811 }
8812 ctxt->standalone = ctxt->input->standalone;
8813 SKIP_BLANKS;
8814 } else {
8815 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8816 }
8817 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8818 ctxt->sax->startDocument(ctxt->userData);
8819
8820 /*
8821 * The Misc part of the Prolog
8822 */
8823 GROW;
8824 xmlParseMisc(ctxt);
8825
8826 /*
8827 * Then possibly doc type declaration(s) and more Misc
8828 * (doctypedecl Misc*)?
8829 */
8830 GROW;
8831 if ((RAW == '<') && (NXT(1) == '!') &&
8832 (NXT(2) == 'D') && (NXT(3) == 'O') &&
8833 (NXT(4) == 'C') && (NXT(5) == 'T') &&
8834 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8835 (NXT(8) == 'E')) {
8836
8837 ctxt->inSubset = 1;
8838 xmlParseDocTypeDecl(ctxt);
8839 if (RAW == '[') {
8840 ctxt->instate = XML_PARSER_DTD;
8841 xmlParseInternalSubset(ctxt);
8842 }
8843
8844 /*
8845 * Create and update the external subset.
8846 */
8847 ctxt->inSubset = 2;
8848 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8849 (!ctxt->disableSAX))
8850 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8851 ctxt->extSubSystem, ctxt->extSubURI);
8852 ctxt->inSubset = 0;
8853
8854
8855 ctxt->instate = XML_PARSER_PROLOG;
8856 xmlParseMisc(ctxt);
8857 }
8858
8859 /*
8860 * Time to start parsing the tree itself
8861 */
8862 GROW;
8863 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008864 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8865 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008866 } else {
8867 ctxt->instate = XML_PARSER_CONTENT;
8868 xmlParseElement(ctxt);
8869 ctxt->instate = XML_PARSER_EPILOG;
8870
8871
8872 /*
8873 * The Misc part at the end
8874 */
8875 xmlParseMisc(ctxt);
8876
Daniel Veillard561b7f82002-03-20 21:55:57 +00008877 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008878 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008879 }
8880 ctxt->instate = XML_PARSER_EOF;
8881 }
8882
8883 /*
8884 * SAX: end of the document processing.
8885 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008886 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008887 ctxt->sax->endDocument(ctxt->userData);
8888
Daniel Veillard5997aca2002-03-18 18:36:20 +00008889 /*
8890 * Remove locally kept entity definitions if the tree was not built
8891 */
8892 if ((ctxt->myDoc != NULL) &&
8893 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8894 xmlFreeDoc(ctxt->myDoc);
8895 ctxt->myDoc = NULL;
8896 }
8897
Daniel Veillardc7612992002-02-17 22:47:37 +00008898 if (! ctxt->wellFormed) {
8899 ctxt->valid = 0;
8900 return(-1);
8901 }
Owen Taylor3473f882001-02-23 17:55:21 +00008902 return(0);
8903}
8904
8905/**
8906 * xmlParseExtParsedEnt:
8907 * @ctxt: an XML parser context
8908 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008909 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008910 * An external general parsed entity is well-formed if it matches the
8911 * production labeled extParsedEnt.
8912 *
8913 * [78] extParsedEnt ::= TextDecl? content
8914 *
8915 * Returns 0, -1 in case of error. the parser context is augmented
8916 * as a result of the parsing.
8917 */
8918
8919int
8920xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8921 xmlChar start[4];
8922 xmlCharEncoding enc;
8923
8924 xmlDefaultSAXHandlerInit();
8925
Daniel Veillard309f81d2003-09-23 09:02:53 +00008926 xmlDetectSAX2(ctxt);
8927
Owen Taylor3473f882001-02-23 17:55:21 +00008928 GROW;
8929
8930 /*
8931 * SAX: beginning of the document processing.
8932 */
8933 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8934 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8935
8936 /*
8937 * Get the 4 first bytes and decode the charset
8938 * if enc != XML_CHAR_ENCODING_NONE
8939 * plug some encoding conversion routines.
8940 */
8941 start[0] = RAW;
8942 start[1] = NXT(1);
8943 start[2] = NXT(2);
8944 start[3] = NXT(3);
8945 enc = xmlDetectCharEncoding(start, 4);
8946 if (enc != XML_CHAR_ENCODING_NONE) {
8947 xmlSwitchEncoding(ctxt, enc);
8948 }
8949
8950
8951 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008952 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008953 }
8954
8955 /*
8956 * Check for the XMLDecl in the Prolog.
8957 */
8958 GROW;
8959 if ((RAW == '<') && (NXT(1) == '?') &&
8960 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8961 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8962
8963 /*
8964 * Note that we will switch encoding on the fly.
8965 */
8966 xmlParseXMLDecl(ctxt);
8967 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8968 /*
8969 * The XML REC instructs us to stop parsing right here
8970 */
8971 return(-1);
8972 }
8973 SKIP_BLANKS;
8974 } else {
8975 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8976 }
8977 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8978 ctxt->sax->startDocument(ctxt->userData);
8979
8980 /*
8981 * Doing validity checking on chunk doesn't make sense
8982 */
8983 ctxt->instate = XML_PARSER_CONTENT;
8984 ctxt->validate = 0;
8985 ctxt->loadsubset = 0;
8986 ctxt->depth = 0;
8987
8988 xmlParseContent(ctxt);
8989
8990 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008991 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008992 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008993 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008994 }
8995
8996 /*
8997 * SAX: end of the document processing.
8998 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008999 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009000 ctxt->sax->endDocument(ctxt->userData);
9001
9002 if (! ctxt->wellFormed) return(-1);
9003 return(0);
9004}
9005
9006/************************************************************************
9007 * *
9008 * Progressive parsing interfaces *
9009 * *
9010 ************************************************************************/
9011
9012/**
9013 * xmlParseLookupSequence:
9014 * @ctxt: an XML parser context
9015 * @first: the first char to lookup
9016 * @next: the next char to lookup or zero
9017 * @third: the next char to lookup or zero
9018 *
9019 * Try to find if a sequence (first, next, third) or just (first next) or
9020 * (first) is available in the input stream.
9021 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9022 * to avoid rescanning sequences of bytes, it DOES change the state of the
9023 * parser, do not use liberally.
9024 *
9025 * Returns the index to the current parsing point if the full sequence
9026 * is available, -1 otherwise.
9027 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009028static int
Owen Taylor3473f882001-02-23 17:55:21 +00009029xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9030 xmlChar next, xmlChar third) {
9031 int base, len;
9032 xmlParserInputPtr in;
9033 const xmlChar *buf;
9034
9035 in = ctxt->input;
9036 if (in == NULL) return(-1);
9037 base = in->cur - in->base;
9038 if (base < 0) return(-1);
9039 if (ctxt->checkIndex > base)
9040 base = ctxt->checkIndex;
9041 if (in->buf == NULL) {
9042 buf = in->base;
9043 len = in->length;
9044 } else {
9045 buf = in->buf->buffer->content;
9046 len = in->buf->buffer->use;
9047 }
9048 /* take into account the sequence length */
9049 if (third) len -= 2;
9050 else if (next) len --;
9051 for (;base < len;base++) {
9052 if (buf[base] == first) {
9053 if (third != 0) {
9054 if ((buf[base + 1] != next) ||
9055 (buf[base + 2] != third)) continue;
9056 } else if (next != 0) {
9057 if (buf[base + 1] != next) continue;
9058 }
9059 ctxt->checkIndex = 0;
9060#ifdef DEBUG_PUSH
9061 if (next == 0)
9062 xmlGenericError(xmlGenericErrorContext,
9063 "PP: lookup '%c' found at %d\n",
9064 first, base);
9065 else if (third == 0)
9066 xmlGenericError(xmlGenericErrorContext,
9067 "PP: lookup '%c%c' found at %d\n",
9068 first, next, base);
9069 else
9070 xmlGenericError(xmlGenericErrorContext,
9071 "PP: lookup '%c%c%c' found at %d\n",
9072 first, next, third, base);
9073#endif
9074 return(base - (in->cur - in->base));
9075 }
9076 }
9077 ctxt->checkIndex = base;
9078#ifdef DEBUG_PUSH
9079 if (next == 0)
9080 xmlGenericError(xmlGenericErrorContext,
9081 "PP: lookup '%c' failed\n", first);
9082 else if (third == 0)
9083 xmlGenericError(xmlGenericErrorContext,
9084 "PP: lookup '%c%c' failed\n", first, next);
9085 else
9086 xmlGenericError(xmlGenericErrorContext,
9087 "PP: lookup '%c%c%c' failed\n", first, next, third);
9088#endif
9089 return(-1);
9090}
9091
9092/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009093 * xmlParseGetLasts:
9094 * @ctxt: an XML parser context
9095 * @lastlt: pointer to store the last '<' from the input
9096 * @lastgt: pointer to store the last '>' from the input
9097 *
9098 * Lookup the last < and > in the current chunk
9099 */
9100static void
9101xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9102 const xmlChar **lastgt) {
9103 const xmlChar *tmp;
9104
9105 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9106 xmlGenericError(xmlGenericErrorContext,
9107 "Internal error: xmlParseGetLasts\n");
9108 return;
9109 }
9110 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9111 tmp = ctxt->input->end;
9112 tmp--;
9113 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9114 (*tmp != '>')) tmp--;
9115 if (tmp < ctxt->input->base) {
9116 *lastlt = NULL;
9117 *lastgt = NULL;
9118 } else if (*tmp == '<') {
9119 *lastlt = tmp;
9120 tmp--;
9121 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9122 if (tmp < ctxt->input->base)
9123 *lastgt = NULL;
9124 else
9125 *lastgt = tmp;
9126 } else {
9127 *lastgt = tmp;
9128 tmp--;
9129 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9130 if (tmp < ctxt->input->base)
9131 *lastlt = NULL;
9132 else
9133 *lastlt = tmp;
9134 }
9135
9136 } else {
9137 *lastlt = NULL;
9138 *lastgt = NULL;
9139 }
9140}
9141/**
Owen Taylor3473f882001-02-23 17:55:21 +00009142 * xmlParseTryOrFinish:
9143 * @ctxt: an XML parser context
9144 * @terminate: last chunk indicator
9145 *
9146 * Try to progress on parsing
9147 *
9148 * Returns zero if no parsing was possible
9149 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009150static int
Owen Taylor3473f882001-02-23 17:55:21 +00009151xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9152 int ret = 0;
9153 int avail;
9154 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009155 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009156
9157#ifdef DEBUG_PUSH
9158 switch (ctxt->instate) {
9159 case XML_PARSER_EOF:
9160 xmlGenericError(xmlGenericErrorContext,
9161 "PP: try EOF\n"); break;
9162 case XML_PARSER_START:
9163 xmlGenericError(xmlGenericErrorContext,
9164 "PP: try START\n"); break;
9165 case XML_PARSER_MISC:
9166 xmlGenericError(xmlGenericErrorContext,
9167 "PP: try MISC\n");break;
9168 case XML_PARSER_COMMENT:
9169 xmlGenericError(xmlGenericErrorContext,
9170 "PP: try COMMENT\n");break;
9171 case XML_PARSER_PROLOG:
9172 xmlGenericError(xmlGenericErrorContext,
9173 "PP: try PROLOG\n");break;
9174 case XML_PARSER_START_TAG:
9175 xmlGenericError(xmlGenericErrorContext,
9176 "PP: try START_TAG\n");break;
9177 case XML_PARSER_CONTENT:
9178 xmlGenericError(xmlGenericErrorContext,
9179 "PP: try CONTENT\n");break;
9180 case XML_PARSER_CDATA_SECTION:
9181 xmlGenericError(xmlGenericErrorContext,
9182 "PP: try CDATA_SECTION\n");break;
9183 case XML_PARSER_END_TAG:
9184 xmlGenericError(xmlGenericErrorContext,
9185 "PP: try END_TAG\n");break;
9186 case XML_PARSER_ENTITY_DECL:
9187 xmlGenericError(xmlGenericErrorContext,
9188 "PP: try ENTITY_DECL\n");break;
9189 case XML_PARSER_ENTITY_VALUE:
9190 xmlGenericError(xmlGenericErrorContext,
9191 "PP: try ENTITY_VALUE\n");break;
9192 case XML_PARSER_ATTRIBUTE_VALUE:
9193 xmlGenericError(xmlGenericErrorContext,
9194 "PP: try ATTRIBUTE_VALUE\n");break;
9195 case XML_PARSER_DTD:
9196 xmlGenericError(xmlGenericErrorContext,
9197 "PP: try DTD\n");break;
9198 case XML_PARSER_EPILOG:
9199 xmlGenericError(xmlGenericErrorContext,
9200 "PP: try EPILOG\n");break;
9201 case XML_PARSER_PI:
9202 xmlGenericError(xmlGenericErrorContext,
9203 "PP: try PI\n");break;
9204 case XML_PARSER_IGNORE:
9205 xmlGenericError(xmlGenericErrorContext,
9206 "PP: try IGNORE\n");break;
9207 }
9208#endif
9209
Daniel Veillarda880b122003-04-21 21:36:41 +00009210 if (ctxt->input->cur - ctxt->input->base > 4096) {
9211 xmlSHRINK(ctxt);
9212 ctxt->checkIndex = 0;
9213 }
9214 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009215
Daniel Veillarda880b122003-04-21 21:36:41 +00009216 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009217 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9218 return(0);
9219
9220
Owen Taylor3473f882001-02-23 17:55:21 +00009221 /*
9222 * Pop-up of finished entities.
9223 */
9224 while ((RAW == 0) && (ctxt->inputNr > 1))
9225 xmlPopInput(ctxt);
9226
9227 if (ctxt->input ==NULL) break;
9228 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 avail = ctxt->input->length -
9230 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009231 else {
9232 /*
9233 * If we are operating on converted input, try to flush
9234 * remainng chars to avoid them stalling in the non-converted
9235 * buffer.
9236 */
9237 if ((ctxt->input->buf->raw != NULL) &&
9238 (ctxt->input->buf->raw->use > 0)) {
9239 int base = ctxt->input->base -
9240 ctxt->input->buf->buffer->content;
9241 int current = ctxt->input->cur - ctxt->input->base;
9242
9243 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9244 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9245 ctxt->input->cur = ctxt->input->base + current;
9246 ctxt->input->end =
9247 &ctxt->input->buf->buffer->content[
9248 ctxt->input->buf->buffer->use];
9249 }
9250 avail = ctxt->input->buf->buffer->use -
9251 (ctxt->input->cur - ctxt->input->base);
9252 }
Owen Taylor3473f882001-02-23 17:55:21 +00009253 if (avail < 1)
9254 goto done;
9255 switch (ctxt->instate) {
9256 case XML_PARSER_EOF:
9257 /*
9258 * Document parsing is done !
9259 */
9260 goto done;
9261 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009262 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9263 xmlChar start[4];
9264 xmlCharEncoding enc;
9265
9266 /*
9267 * Very first chars read from the document flow.
9268 */
9269 if (avail < 4)
9270 goto done;
9271
9272 /*
9273 * Get the 4 first bytes and decode the charset
9274 * if enc != XML_CHAR_ENCODING_NONE
9275 * plug some encoding conversion routines.
9276 */
9277 start[0] = RAW;
9278 start[1] = NXT(1);
9279 start[2] = NXT(2);
9280 start[3] = NXT(3);
9281 enc = xmlDetectCharEncoding(start, 4);
9282 if (enc != XML_CHAR_ENCODING_NONE) {
9283 xmlSwitchEncoding(ctxt, enc);
9284 }
9285 break;
9286 }
Owen Taylor3473f882001-02-23 17:55:21 +00009287
9288 cur = ctxt->input->cur[0];
9289 next = ctxt->input->cur[1];
9290 if (cur == 0) {
9291 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9292 ctxt->sax->setDocumentLocator(ctxt->userData,
9293 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009294 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009295 ctxt->instate = XML_PARSER_EOF;
9296#ifdef DEBUG_PUSH
9297 xmlGenericError(xmlGenericErrorContext,
9298 "PP: entering EOF\n");
9299#endif
9300 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9301 ctxt->sax->endDocument(ctxt->userData);
9302 goto done;
9303 }
9304 if ((cur == '<') && (next == '?')) {
9305 /* PI or XML decl */
9306 if (avail < 5) return(ret);
9307 if ((!terminate) &&
9308 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9309 return(ret);
9310 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9311 ctxt->sax->setDocumentLocator(ctxt->userData,
9312 &xmlDefaultSAXLocator);
9313 if ((ctxt->input->cur[2] == 'x') &&
9314 (ctxt->input->cur[3] == 'm') &&
9315 (ctxt->input->cur[4] == 'l') &&
9316 (IS_BLANK(ctxt->input->cur[5]))) {
9317 ret += 5;
9318#ifdef DEBUG_PUSH
9319 xmlGenericError(xmlGenericErrorContext,
9320 "PP: Parsing XML Decl\n");
9321#endif
9322 xmlParseXMLDecl(ctxt);
9323 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9324 /*
9325 * The XML REC instructs us to stop parsing right
9326 * here
9327 */
9328 ctxt->instate = XML_PARSER_EOF;
9329 return(0);
9330 }
9331 ctxt->standalone = ctxt->input->standalone;
9332 if ((ctxt->encoding == NULL) &&
9333 (ctxt->input->encoding != NULL))
9334 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9335 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9336 (!ctxt->disableSAX))
9337 ctxt->sax->startDocument(ctxt->userData);
9338 ctxt->instate = XML_PARSER_MISC;
9339#ifdef DEBUG_PUSH
9340 xmlGenericError(xmlGenericErrorContext,
9341 "PP: entering MISC\n");
9342#endif
9343 } else {
9344 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9345 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9346 (!ctxt->disableSAX))
9347 ctxt->sax->startDocument(ctxt->userData);
9348 ctxt->instate = XML_PARSER_MISC;
9349#ifdef DEBUG_PUSH
9350 xmlGenericError(xmlGenericErrorContext,
9351 "PP: entering MISC\n");
9352#endif
9353 }
9354 } else {
9355 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9356 ctxt->sax->setDocumentLocator(ctxt->userData,
9357 &xmlDefaultSAXLocator);
9358 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9359 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9360 (!ctxt->disableSAX))
9361 ctxt->sax->startDocument(ctxt->userData);
9362 ctxt->instate = XML_PARSER_MISC;
9363#ifdef DEBUG_PUSH
9364 xmlGenericError(xmlGenericErrorContext,
9365 "PP: entering MISC\n");
9366#endif
9367 }
9368 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009369 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009370 const xmlChar *name;
9371 const xmlChar *prefix;
9372 const xmlChar *URI;
9373 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009374
9375 if ((avail < 2) && (ctxt->inputNr == 1))
9376 goto done;
9377 cur = ctxt->input->cur[0];
9378 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009379 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009380 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9382 ctxt->sax->endDocument(ctxt->userData);
9383 goto done;
9384 }
9385 if (!terminate) {
9386 if (ctxt->progressive) {
9387 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9388 goto done;
9389 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9390 goto done;
9391 }
9392 }
9393 if (ctxt->spaceNr == 0)
9394 spacePush(ctxt, -1);
9395 else
9396 spacePush(ctxt, *ctxt->space);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009397 if (ctxt->sax2)
9398 name = xmlParseStartTag2(ctxt, &prefix, &URI);
9399 else
9400 name = xmlParseStartTag(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009401 if (name == NULL) {
9402 spacePop(ctxt);
9403 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009404 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9405 ctxt->sax->endDocument(ctxt->userData);
9406 goto done;
9407 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009408 /*
9409 * [ VC: Root Element Type ]
9410 * The Name in the document type declaration must match
9411 * the element type of the root element.
9412 */
9413 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9414 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9415 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9416
9417 /*
9418 * Check for an Empty Element.
9419 */
9420 if ((RAW == '/') && (NXT(1) == '>')) {
9421 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009422
9423 if (ctxt->sax2) {
9424 if ((ctxt->sax != NULL) &&
9425 (ctxt->sax->endElementNs != NULL) &&
9426 (!ctxt->disableSAX))
9427 ctxt->sax->endElementNs(ctxt->userData, name,
9428 prefix, URI);
9429 } else {
9430 if ((ctxt->sax != NULL) &&
9431 (ctxt->sax->endElement != NULL) &&
9432 (!ctxt->disableSAX))
9433 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009434 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009435 spacePop(ctxt);
9436 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009437 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009438 } else {
9439 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009440 }
9441 break;
9442 }
9443 if (RAW == '>') {
9444 NEXT;
9445 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009446 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009447 "Couldn't find end of Start Tag %s\n",
9448 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009449 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009450 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009451 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452 if (ctxt->sax2)
9453 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9454 else
9455 namePush(ctxt, name);
9456
Daniel Veillarda880b122003-04-21 21:36:41 +00009457 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009458 break;
9459 }
9460 case XML_PARSER_CONTENT: {
9461 const xmlChar *test;
9462 unsigned int cons;
9463 if ((avail < 2) && (ctxt->inputNr == 1))
9464 goto done;
9465 cur = ctxt->input->cur[0];
9466 next = ctxt->input->cur[1];
9467
9468 test = CUR_PTR;
9469 cons = ctxt->input->consumed;
9470 if ((cur == '<') && (next == '/')) {
9471 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 break;
9473 } else if ((cur == '<') && (next == '?')) {
9474 if ((!terminate) &&
9475 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9476 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 xmlParsePI(ctxt);
9478 } else if ((cur == '<') && (next != '!')) {
9479 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009480 break;
9481 } else if ((cur == '<') && (next == '!') &&
9482 (ctxt->input->cur[2] == '-') &&
9483 (ctxt->input->cur[3] == '-')) {
9484 if ((!terminate) &&
9485 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9486 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009487 xmlParseComment(ctxt);
9488 ctxt->instate = XML_PARSER_CONTENT;
9489 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9490 (ctxt->input->cur[2] == '[') &&
9491 (ctxt->input->cur[3] == 'C') &&
9492 (ctxt->input->cur[4] == 'D') &&
9493 (ctxt->input->cur[5] == 'A') &&
9494 (ctxt->input->cur[6] == 'T') &&
9495 (ctxt->input->cur[7] == 'A') &&
9496 (ctxt->input->cur[8] == '[')) {
9497 SKIP(9);
9498 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 break;
9500 } else if ((cur == '<') && (next == '!') &&
9501 (avail < 9)) {
9502 goto done;
9503 } else if (cur == '&') {
9504 if ((!terminate) &&
9505 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9506 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009507 xmlParseReference(ctxt);
9508 } else {
9509 /* TODO Avoid the extra copy, handle directly !!! */
9510 /*
9511 * Goal of the following test is:
9512 * - minimize calls to the SAX 'character' callback
9513 * when they are mergeable
9514 * - handle an problem for isBlank when we only parse
9515 * a sequence of blank chars and the next one is
9516 * not available to check against '<' presence.
9517 * - tries to homogenize the differences in SAX
9518 * callbacks between the push and pull versions
9519 * of the parser.
9520 */
9521 if ((ctxt->inputNr == 1) &&
9522 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9523 if (!terminate) {
9524 if (ctxt->progressive) {
9525 if ((lastlt == NULL) ||
9526 (ctxt->input->cur > lastlt))
9527 goto done;
9528 } else if (xmlParseLookupSequence(ctxt,
9529 '<', 0, 0) < 0) {
9530 goto done;
9531 }
9532 }
9533 }
9534 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009535 xmlParseCharData(ctxt, 0);
9536 }
9537 /*
9538 * Pop-up of finished entities.
9539 */
9540 while ((RAW == 0) && (ctxt->inputNr > 1))
9541 xmlPopInput(ctxt);
9542 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009543 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9544 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009545 ctxt->instate = XML_PARSER_EOF;
9546 break;
9547 }
9548 break;
9549 }
9550 case XML_PARSER_END_TAG:
9551 if (avail < 2)
9552 goto done;
9553 if (!terminate) {
9554 if (ctxt->progressive) {
9555 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9556 goto done;
9557 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9558 goto done;
9559 }
9560 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009561 if (ctxt->sax2) {
9562 xmlParseEndTag2(ctxt,
9563 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9564 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9565 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9566 nameNsPop(ctxt);
9567 } else
9568 xmlParseEndTag1(ctxt, 0);
9569 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009570 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009571 } else {
9572 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009573 }
9574 break;
9575 case XML_PARSER_CDATA_SECTION: {
9576 /*
9577 * The Push mode need to have the SAX callback for
9578 * cdataBlock merge back contiguous callbacks.
9579 */
9580 int base;
9581
9582 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9583 if (base < 0) {
9584 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9585 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9586 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009587 ctxt->sax->cdataBlock(ctxt->userData,
9588 ctxt->input->cur,
9589 XML_PARSER_BIG_BUFFER_SIZE);
9590 else if (ctxt->sax->characters != NULL)
9591 ctxt->sax->characters(ctxt->userData,
9592 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009593 XML_PARSER_BIG_BUFFER_SIZE);
9594 }
9595 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9596 ctxt->checkIndex = 0;
9597 }
9598 goto done;
9599 } else {
9600 if ((ctxt->sax != NULL) && (base > 0) &&
9601 (!ctxt->disableSAX)) {
9602 if (ctxt->sax->cdataBlock != NULL)
9603 ctxt->sax->cdataBlock(ctxt->userData,
9604 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009605 else if (ctxt->sax->characters != NULL)
9606 ctxt->sax->characters(ctxt->userData,
9607 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009608 }
9609 SKIP(base + 3);
9610 ctxt->checkIndex = 0;
9611 ctxt->instate = XML_PARSER_CONTENT;
9612#ifdef DEBUG_PUSH
9613 xmlGenericError(xmlGenericErrorContext,
9614 "PP: entering CONTENT\n");
9615#endif
9616 }
9617 break;
9618 }
Owen Taylor3473f882001-02-23 17:55:21 +00009619 case XML_PARSER_MISC:
9620 SKIP_BLANKS;
9621 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009622 avail = ctxt->input->length -
9623 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009624 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009625 avail = ctxt->input->buf->buffer->use -
9626 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009627 if (avail < 2)
9628 goto done;
9629 cur = ctxt->input->cur[0];
9630 next = ctxt->input->cur[1];
9631 if ((cur == '<') && (next == '?')) {
9632 if ((!terminate) &&
9633 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9634 goto done;
9635#ifdef DEBUG_PUSH
9636 xmlGenericError(xmlGenericErrorContext,
9637 "PP: Parsing PI\n");
9638#endif
9639 xmlParsePI(ctxt);
9640 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009641 (ctxt->input->cur[2] == '-') &&
9642 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009643 if ((!terminate) &&
9644 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9645 goto done;
9646#ifdef DEBUG_PUSH
9647 xmlGenericError(xmlGenericErrorContext,
9648 "PP: Parsing Comment\n");
9649#endif
9650 xmlParseComment(ctxt);
9651 ctxt->instate = XML_PARSER_MISC;
9652 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009653 (ctxt->input->cur[2] == 'D') &&
9654 (ctxt->input->cur[3] == 'O') &&
9655 (ctxt->input->cur[4] == 'C') &&
9656 (ctxt->input->cur[5] == 'T') &&
9657 (ctxt->input->cur[6] == 'Y') &&
9658 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009659 (ctxt->input->cur[8] == 'E')) {
9660 if ((!terminate) &&
9661 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9662 goto done;
9663#ifdef DEBUG_PUSH
9664 xmlGenericError(xmlGenericErrorContext,
9665 "PP: Parsing internal subset\n");
9666#endif
9667 ctxt->inSubset = 1;
9668 xmlParseDocTypeDecl(ctxt);
9669 if (RAW == '[') {
9670 ctxt->instate = XML_PARSER_DTD;
9671#ifdef DEBUG_PUSH
9672 xmlGenericError(xmlGenericErrorContext,
9673 "PP: entering DTD\n");
9674#endif
9675 } else {
9676 /*
9677 * Create and update the external subset.
9678 */
9679 ctxt->inSubset = 2;
9680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9681 (ctxt->sax->externalSubset != NULL))
9682 ctxt->sax->externalSubset(ctxt->userData,
9683 ctxt->intSubName, ctxt->extSubSystem,
9684 ctxt->extSubURI);
9685 ctxt->inSubset = 0;
9686 ctxt->instate = XML_PARSER_PROLOG;
9687#ifdef DEBUG_PUSH
9688 xmlGenericError(xmlGenericErrorContext,
9689 "PP: entering PROLOG\n");
9690#endif
9691 }
9692 } else if ((cur == '<') && (next == '!') &&
9693 (avail < 9)) {
9694 goto done;
9695 } else {
9696 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009697 ctxt->progressive = 1;
9698 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009699#ifdef DEBUG_PUSH
9700 xmlGenericError(xmlGenericErrorContext,
9701 "PP: entering START_TAG\n");
9702#endif
9703 }
9704 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009705 case XML_PARSER_PROLOG:
9706 SKIP_BLANKS;
9707 if (ctxt->input->buf == NULL)
9708 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9709 else
9710 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9711 if (avail < 2)
9712 goto done;
9713 cur = ctxt->input->cur[0];
9714 next = ctxt->input->cur[1];
9715 if ((cur == '<') && (next == '?')) {
9716 if ((!terminate) &&
9717 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9718 goto done;
9719#ifdef DEBUG_PUSH
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: Parsing PI\n");
9722#endif
9723 xmlParsePI(ctxt);
9724 } else if ((cur == '<') && (next == '!') &&
9725 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9726 if ((!terminate) &&
9727 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9728 goto done;
9729#ifdef DEBUG_PUSH
9730 xmlGenericError(xmlGenericErrorContext,
9731 "PP: Parsing Comment\n");
9732#endif
9733 xmlParseComment(ctxt);
9734 ctxt->instate = XML_PARSER_PROLOG;
9735 } else if ((cur == '<') && (next == '!') &&
9736 (avail < 4)) {
9737 goto done;
9738 } else {
9739 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009740 ctxt->progressive = 1;
9741 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009742#ifdef DEBUG_PUSH
9743 xmlGenericError(xmlGenericErrorContext,
9744 "PP: entering START_TAG\n");
9745#endif
9746 }
9747 break;
9748 case XML_PARSER_EPILOG:
9749 SKIP_BLANKS;
9750 if (ctxt->input->buf == NULL)
9751 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9752 else
9753 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9754 if (avail < 2)
9755 goto done;
9756 cur = ctxt->input->cur[0];
9757 next = ctxt->input->cur[1];
9758 if ((cur == '<') && (next == '?')) {
9759 if ((!terminate) &&
9760 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9761 goto done;
9762#ifdef DEBUG_PUSH
9763 xmlGenericError(xmlGenericErrorContext,
9764 "PP: Parsing PI\n");
9765#endif
9766 xmlParsePI(ctxt);
9767 ctxt->instate = XML_PARSER_EPILOG;
9768 } else if ((cur == '<') && (next == '!') &&
9769 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9770 if ((!terminate) &&
9771 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9772 goto done;
9773#ifdef DEBUG_PUSH
9774 xmlGenericError(xmlGenericErrorContext,
9775 "PP: Parsing Comment\n");
9776#endif
9777 xmlParseComment(ctxt);
9778 ctxt->instate = XML_PARSER_EPILOG;
9779 } else if ((cur == '<') && (next == '!') &&
9780 (avail < 4)) {
9781 goto done;
9782 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009783 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009784 ctxt->instate = XML_PARSER_EOF;
9785#ifdef DEBUG_PUSH
9786 xmlGenericError(xmlGenericErrorContext,
9787 "PP: entering EOF\n");
9788#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009789 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009790 ctxt->sax->endDocument(ctxt->userData);
9791 goto done;
9792 }
9793 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009794 case XML_PARSER_DTD: {
9795 /*
9796 * Sorry but progressive parsing of the internal subset
9797 * is not expected to be supported. We first check that
9798 * the full content of the internal subset is available and
9799 * the parsing is launched only at that point.
9800 * Internal subset ends up with "']' S? '>'" in an unescaped
9801 * section and not in a ']]>' sequence which are conditional
9802 * sections (whoever argued to keep that crap in XML deserve
9803 * a place in hell !).
9804 */
9805 int base, i;
9806 xmlChar *buf;
9807 xmlChar quote = 0;
9808
9809 base = ctxt->input->cur - ctxt->input->base;
9810 if (base < 0) return(0);
9811 if (ctxt->checkIndex > base)
9812 base = ctxt->checkIndex;
9813 buf = ctxt->input->buf->buffer->content;
9814 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9815 base++) {
9816 if (quote != 0) {
9817 if (buf[base] == quote)
9818 quote = 0;
9819 continue;
9820 }
9821 if (buf[base] == '"') {
9822 quote = '"';
9823 continue;
9824 }
9825 if (buf[base] == '\'') {
9826 quote = '\'';
9827 continue;
9828 }
9829 if (buf[base] == ']') {
9830 if ((unsigned int) base +1 >=
9831 ctxt->input->buf->buffer->use)
9832 break;
9833 if (buf[base + 1] == ']') {
9834 /* conditional crap, skip both ']' ! */
9835 base++;
9836 continue;
9837 }
9838 for (i = 0;
9839 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9840 i++) {
9841 if (buf[base + i] == '>')
9842 goto found_end_int_subset;
9843 }
9844 break;
9845 }
9846 }
9847 /*
9848 * We didn't found the end of the Internal subset
9849 */
9850 if (quote == 0)
9851 ctxt->checkIndex = base;
9852#ifdef DEBUG_PUSH
9853 if (next == 0)
9854 xmlGenericError(xmlGenericErrorContext,
9855 "PP: lookup of int subset end filed\n");
9856#endif
9857 goto done;
9858
9859found_end_int_subset:
9860 xmlParseInternalSubset(ctxt);
9861 ctxt->inSubset = 2;
9862 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9863 (ctxt->sax->externalSubset != NULL))
9864 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9865 ctxt->extSubSystem, ctxt->extSubURI);
9866 ctxt->inSubset = 0;
9867 ctxt->instate = XML_PARSER_PROLOG;
9868 ctxt->checkIndex = 0;
9869#ifdef DEBUG_PUSH
9870 xmlGenericError(xmlGenericErrorContext,
9871 "PP: entering PROLOG\n");
9872#endif
9873 break;
9874 }
9875 case XML_PARSER_COMMENT:
9876 xmlGenericError(xmlGenericErrorContext,
9877 "PP: internal error, state == COMMENT\n");
9878 ctxt->instate = XML_PARSER_CONTENT;
9879#ifdef DEBUG_PUSH
9880 xmlGenericError(xmlGenericErrorContext,
9881 "PP: entering CONTENT\n");
9882#endif
9883 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009884 case XML_PARSER_IGNORE:
9885 xmlGenericError(xmlGenericErrorContext,
9886 "PP: internal error, state == IGNORE");
9887 ctxt->instate = XML_PARSER_DTD;
9888#ifdef DEBUG_PUSH
9889 xmlGenericError(xmlGenericErrorContext,
9890 "PP: entering DTD\n");
9891#endif
9892 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009893 case XML_PARSER_PI:
9894 xmlGenericError(xmlGenericErrorContext,
9895 "PP: internal error, state == PI\n");
9896 ctxt->instate = XML_PARSER_CONTENT;
9897#ifdef DEBUG_PUSH
9898 xmlGenericError(xmlGenericErrorContext,
9899 "PP: entering CONTENT\n");
9900#endif
9901 break;
9902 case XML_PARSER_ENTITY_DECL:
9903 xmlGenericError(xmlGenericErrorContext,
9904 "PP: internal error, state == ENTITY_DECL\n");
9905 ctxt->instate = XML_PARSER_DTD;
9906#ifdef DEBUG_PUSH
9907 xmlGenericError(xmlGenericErrorContext,
9908 "PP: entering DTD\n");
9909#endif
9910 break;
9911 case XML_PARSER_ENTITY_VALUE:
9912 xmlGenericError(xmlGenericErrorContext,
9913 "PP: internal error, state == ENTITY_VALUE\n");
9914 ctxt->instate = XML_PARSER_CONTENT;
9915#ifdef DEBUG_PUSH
9916 xmlGenericError(xmlGenericErrorContext,
9917 "PP: entering DTD\n");
9918#endif
9919 break;
9920 case XML_PARSER_ATTRIBUTE_VALUE:
9921 xmlGenericError(xmlGenericErrorContext,
9922 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9923 ctxt->instate = XML_PARSER_START_TAG;
9924#ifdef DEBUG_PUSH
9925 xmlGenericError(xmlGenericErrorContext,
9926 "PP: entering START_TAG\n");
9927#endif
9928 break;
9929 case XML_PARSER_SYSTEM_LITERAL:
9930 xmlGenericError(xmlGenericErrorContext,
9931 "PP: internal error, state == SYSTEM_LITERAL\n");
9932 ctxt->instate = XML_PARSER_START_TAG;
9933#ifdef DEBUG_PUSH
9934 xmlGenericError(xmlGenericErrorContext,
9935 "PP: entering START_TAG\n");
9936#endif
9937 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009938 case XML_PARSER_PUBLIC_LITERAL:
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: internal error, state == PUBLIC_LITERAL\n");
9941 ctxt->instate = XML_PARSER_START_TAG;
9942#ifdef DEBUG_PUSH
9943 xmlGenericError(xmlGenericErrorContext,
9944 "PP: entering START_TAG\n");
9945#endif
9946 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009947 }
9948 }
9949done:
9950#ifdef DEBUG_PUSH
9951 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9952#endif
9953 return(ret);
9954}
9955
9956/**
Owen Taylor3473f882001-02-23 17:55:21 +00009957 * xmlParseChunk:
9958 * @ctxt: an XML parser context
9959 * @chunk: an char array
9960 * @size: the size in byte of the chunk
9961 * @terminate: last chunk indicator
9962 *
9963 * Parse a Chunk of memory
9964 *
9965 * Returns zero if no error, the xmlParserErrors otherwise.
9966 */
9967int
9968xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9969 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009970 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9971 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009972 if (ctxt->instate == XML_PARSER_START)
9973 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009974 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9975 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9976 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9977 int cur = ctxt->input->cur - ctxt->input->base;
9978
9979 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9980 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9981 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009982 ctxt->input->end =
9983 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009984#ifdef DEBUG_PUSH
9985 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9986#endif
9987
Owen Taylor3473f882001-02-23 17:55:21 +00009988 } else if (ctxt->instate != XML_PARSER_EOF) {
9989 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9990 xmlParserInputBufferPtr in = ctxt->input->buf;
9991 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9992 (in->raw != NULL)) {
9993 int nbchars;
9994
9995 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9996 if (nbchars < 0) {
9997 xmlGenericError(xmlGenericErrorContext,
9998 "xmlParseChunk: encoder error\n");
9999 return(XML_ERR_INVALID_ENCODING);
10000 }
10001 }
10002 }
10003 }
10004 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010005 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10006 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010007 if (terminate) {
10008 /*
10009 * Check for termination
10010 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010011 int avail = 0;
10012 if (ctxt->input->buf == NULL)
10013 avail = ctxt->input->length -
10014 (ctxt->input->cur - ctxt->input->base);
10015 else
10016 avail = ctxt->input->buf->buffer->use -
10017 (ctxt->input->cur - ctxt->input->base);
10018
Owen Taylor3473f882001-02-23 17:55:21 +000010019 if ((ctxt->instate != XML_PARSER_EOF) &&
10020 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010021 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010022 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010023 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010024 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010025 }
Owen Taylor3473f882001-02-23 17:55:21 +000010026 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010027 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010028 ctxt->sax->endDocument(ctxt->userData);
10029 }
10030 ctxt->instate = XML_PARSER_EOF;
10031 }
10032 return((xmlParserErrors) ctxt->errNo);
10033}
10034
10035/************************************************************************
10036 * *
10037 * I/O front end functions to the parser *
10038 * *
10039 ************************************************************************/
10040
10041/**
10042 * xmlStopParser:
10043 * @ctxt: an XML parser context
10044 *
10045 * Blocks further parser processing
10046 */
10047void
10048xmlStopParser(xmlParserCtxtPtr ctxt) {
10049 ctxt->instate = XML_PARSER_EOF;
10050 if (ctxt->input != NULL)
10051 ctxt->input->cur = BAD_CAST"";
10052}
10053
10054/**
10055 * xmlCreatePushParserCtxt:
10056 * @sax: a SAX handler
10057 * @user_data: The user data returned on SAX callbacks
10058 * @chunk: a pointer to an array of chars
10059 * @size: number of chars in the array
10060 * @filename: an optional file name or URI
10061 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010062 * Create a parser context for using the XML parser in push mode.
10063 * If @buffer and @size are non-NULL, the data is used to detect
10064 * the encoding. The remaining characters will be parsed so they
10065 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010066 * To allow content encoding detection, @size should be >= 4
10067 * The value of @filename is used for fetching external entities
10068 * and error/warning reports.
10069 *
10070 * Returns the new parser context or NULL
10071 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010072
Owen Taylor3473f882001-02-23 17:55:21 +000010073xmlParserCtxtPtr
10074xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10075 const char *chunk, int size, const char *filename) {
10076 xmlParserCtxtPtr ctxt;
10077 xmlParserInputPtr inputStream;
10078 xmlParserInputBufferPtr buf;
10079 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10080
10081 /*
10082 * plug some encoding conversion routines
10083 */
10084 if ((chunk != NULL) && (size >= 4))
10085 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10086
10087 buf = xmlAllocParserInputBuffer(enc);
10088 if (buf == NULL) return(NULL);
10089
10090 ctxt = xmlNewParserCtxt();
10091 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010092 xmlGenericError(xmlGenericErrorContext,
10093 "xml parser: out of memory\n");
10094 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010095 return(NULL);
10096 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010097 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10098 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010099 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010100 xmlFreeParserInputBuffer(buf);
10101 xmlFreeParserCtxt(ctxt);
10102 return(NULL);
10103 }
Owen Taylor3473f882001-02-23 17:55:21 +000010104 if (sax != NULL) {
Daniel Veillard092643b2003-09-25 14:29:29 +000010105 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000010106 xmlFree(ctxt->sax);
10107 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10108 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010109 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010110 xmlFreeParserInputBuffer(buf);
10111 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010112 return(NULL);
10113 }
10114 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10115 if (user_data != NULL)
10116 ctxt->userData = user_data;
10117 }
10118 if (filename == NULL) {
10119 ctxt->directory = NULL;
10120 } else {
10121 ctxt->directory = xmlParserGetDirectory(filename);
10122 }
10123
10124 inputStream = xmlNewInputStream(ctxt);
10125 if (inputStream == NULL) {
10126 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010127 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010128 return(NULL);
10129 }
10130
10131 if (filename == NULL)
10132 inputStream->filename = NULL;
10133 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010134 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010135 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010136 inputStream->buf = buf;
10137 inputStream->base = inputStream->buf->buffer->content;
10138 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010139 inputStream->end =
10140 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010141
10142 inputPush(ctxt, inputStream);
10143
10144 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10145 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010146 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10147 int cur = ctxt->input->cur - ctxt->input->base;
10148
Owen Taylor3473f882001-02-23 17:55:21 +000010149 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010150
10151 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10152 ctxt->input->cur = ctxt->input->base + cur;
10153 ctxt->input->end =
10154 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010155#ifdef DEBUG_PUSH
10156 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10157#endif
10158 }
10159
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010160 if (enc != XML_CHAR_ENCODING_NONE) {
10161 xmlSwitchEncoding(ctxt, enc);
10162 }
10163
Owen Taylor3473f882001-02-23 17:55:21 +000010164 return(ctxt);
10165}
10166
10167/**
10168 * xmlCreateIOParserCtxt:
10169 * @sax: a SAX handler
10170 * @user_data: The user data returned on SAX callbacks
10171 * @ioread: an I/O read function
10172 * @ioclose: an I/O close function
10173 * @ioctx: an I/O handler
10174 * @enc: the charset encoding if known
10175 *
10176 * Create a parser context for using the XML parser with an existing
10177 * I/O stream
10178 *
10179 * Returns the new parser context or NULL
10180 */
10181xmlParserCtxtPtr
10182xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10183 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10184 void *ioctx, xmlCharEncoding enc) {
10185 xmlParserCtxtPtr ctxt;
10186 xmlParserInputPtr inputStream;
10187 xmlParserInputBufferPtr buf;
10188
10189 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10190 if (buf == NULL) return(NULL);
10191
10192 ctxt = xmlNewParserCtxt();
10193 if (ctxt == NULL) {
10194 xmlFree(buf);
10195 return(NULL);
10196 }
10197 if (sax != NULL) {
Daniel Veillard092643b2003-09-25 14:29:29 +000010198 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000010199 xmlFree(ctxt->sax);
10200 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10201 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010202 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010203 xmlFree(ctxt);
10204 return(NULL);
10205 }
10206 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10207 if (user_data != NULL)
10208 ctxt->userData = user_data;
10209 }
10210
10211 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10212 if (inputStream == NULL) {
10213 xmlFreeParserCtxt(ctxt);
10214 return(NULL);
10215 }
10216 inputPush(ctxt, inputStream);
10217
10218 return(ctxt);
10219}
10220
10221/************************************************************************
10222 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010223 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010224 * *
10225 ************************************************************************/
10226
10227/**
10228 * xmlIOParseDTD:
10229 * @sax: the SAX handler block or NULL
10230 * @input: an Input Buffer
10231 * @enc: the charset encoding if known
10232 *
10233 * Load and parse a DTD
10234 *
10235 * Returns the resulting xmlDtdPtr or NULL in case of error.
10236 * @input will be freed at parsing end.
10237 */
10238
10239xmlDtdPtr
10240xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10241 xmlCharEncoding enc) {
10242 xmlDtdPtr ret = NULL;
10243 xmlParserCtxtPtr ctxt;
10244 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010245 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010246
10247 if (input == NULL)
10248 return(NULL);
10249
10250 ctxt = xmlNewParserCtxt();
10251 if (ctxt == NULL) {
10252 return(NULL);
10253 }
10254
10255 /*
10256 * Set-up the SAX context
10257 */
10258 if (sax != NULL) {
10259 if (ctxt->sax != NULL)
10260 xmlFree(ctxt->sax);
10261 ctxt->sax = sax;
10262 ctxt->userData = NULL;
10263 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010264 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010265
10266 /*
10267 * generate a parser input from the I/O handler
10268 */
10269
10270 pinput = xmlNewIOInputStream(ctxt, input, enc);
10271 if (pinput == NULL) {
10272 if (sax != NULL) ctxt->sax = NULL;
10273 xmlFreeParserCtxt(ctxt);
10274 return(NULL);
10275 }
10276
10277 /*
10278 * plug some encoding conversion routines here.
10279 */
10280 xmlPushInput(ctxt, pinput);
10281
10282 pinput->filename = NULL;
10283 pinput->line = 1;
10284 pinput->col = 1;
10285 pinput->base = ctxt->input->cur;
10286 pinput->cur = ctxt->input->cur;
10287 pinput->free = NULL;
10288
10289 /*
10290 * let's parse that entity knowing it's an external subset.
10291 */
10292 ctxt->inSubset = 2;
10293 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10294 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10295 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010296
10297 if (enc == XML_CHAR_ENCODING_NONE) {
10298 /*
10299 * Get the 4 first bytes and decode the charset
10300 * if enc != XML_CHAR_ENCODING_NONE
10301 * plug some encoding conversion routines.
10302 */
10303 start[0] = RAW;
10304 start[1] = NXT(1);
10305 start[2] = NXT(2);
10306 start[3] = NXT(3);
10307 enc = xmlDetectCharEncoding(start, 4);
10308 if (enc != XML_CHAR_ENCODING_NONE) {
10309 xmlSwitchEncoding(ctxt, enc);
10310 }
10311 }
10312
Owen Taylor3473f882001-02-23 17:55:21 +000010313 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10314
10315 if (ctxt->myDoc != NULL) {
10316 if (ctxt->wellFormed) {
10317 ret = ctxt->myDoc->extSubset;
10318 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010319 if (ret != NULL) {
10320 xmlNodePtr tmp;
10321
10322 ret->doc = NULL;
10323 tmp = ret->children;
10324 while (tmp != NULL) {
10325 tmp->doc = NULL;
10326 tmp = tmp->next;
10327 }
10328 }
Owen Taylor3473f882001-02-23 17:55:21 +000010329 } else {
10330 ret = NULL;
10331 }
10332 xmlFreeDoc(ctxt->myDoc);
10333 ctxt->myDoc = NULL;
10334 }
10335 if (sax != NULL) ctxt->sax = NULL;
10336 xmlFreeParserCtxt(ctxt);
10337
10338 return(ret);
10339}
10340
10341/**
10342 * xmlSAXParseDTD:
10343 * @sax: the SAX handler block
10344 * @ExternalID: a NAME* containing the External ID of the DTD
10345 * @SystemID: a NAME* containing the URL to the DTD
10346 *
10347 * Load and parse an external subset.
10348 *
10349 * Returns the resulting xmlDtdPtr or NULL in case of error.
10350 */
10351
10352xmlDtdPtr
10353xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10354 const xmlChar *SystemID) {
10355 xmlDtdPtr ret = NULL;
10356 xmlParserCtxtPtr ctxt;
10357 xmlParserInputPtr input = NULL;
10358 xmlCharEncoding enc;
10359
10360 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10361
10362 ctxt = xmlNewParserCtxt();
10363 if (ctxt == NULL) {
10364 return(NULL);
10365 }
10366
10367 /*
10368 * Set-up the SAX context
10369 */
10370 if (sax != NULL) {
10371 if (ctxt->sax != NULL)
10372 xmlFree(ctxt->sax);
10373 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010374 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010375 }
10376
10377 /*
10378 * Ask the Entity resolver to load the damn thing
10379 */
10380
10381 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010382 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010383 if (input == NULL) {
10384 if (sax != NULL) ctxt->sax = NULL;
10385 xmlFreeParserCtxt(ctxt);
10386 return(NULL);
10387 }
10388
10389 /*
10390 * plug some encoding conversion routines here.
10391 */
10392 xmlPushInput(ctxt, input);
10393 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10394 xmlSwitchEncoding(ctxt, enc);
10395
10396 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010397 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010398 input->line = 1;
10399 input->col = 1;
10400 input->base = ctxt->input->cur;
10401 input->cur = ctxt->input->cur;
10402 input->free = NULL;
10403
10404 /*
10405 * let's parse that entity knowing it's an external subset.
10406 */
10407 ctxt->inSubset = 2;
10408 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10409 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10410 ExternalID, SystemID);
10411 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10412
10413 if (ctxt->myDoc != NULL) {
10414 if (ctxt->wellFormed) {
10415 ret = ctxt->myDoc->extSubset;
10416 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010417 if (ret != NULL) {
10418 xmlNodePtr tmp;
10419
10420 ret->doc = NULL;
10421 tmp = ret->children;
10422 while (tmp != NULL) {
10423 tmp->doc = NULL;
10424 tmp = tmp->next;
10425 }
10426 }
Owen Taylor3473f882001-02-23 17:55:21 +000010427 } else {
10428 ret = NULL;
10429 }
10430 xmlFreeDoc(ctxt->myDoc);
10431 ctxt->myDoc = NULL;
10432 }
10433 if (sax != NULL) ctxt->sax = NULL;
10434 xmlFreeParserCtxt(ctxt);
10435
10436 return(ret);
10437}
10438
10439/**
10440 * xmlParseDTD:
10441 * @ExternalID: a NAME* containing the External ID of the DTD
10442 * @SystemID: a NAME* containing the URL to the DTD
10443 *
10444 * Load and parse an external subset.
10445 *
10446 * Returns the resulting xmlDtdPtr or NULL in case of error.
10447 */
10448
10449xmlDtdPtr
10450xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10451 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10452}
10453
10454/************************************************************************
10455 * *
10456 * Front ends when parsing an Entity *
10457 * *
10458 ************************************************************************/
10459
10460/**
Owen Taylor3473f882001-02-23 17:55:21 +000010461 * xmlParseCtxtExternalEntity:
10462 * @ctx: the existing parsing context
10463 * @URL: the URL for the entity to load
10464 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010465 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010466 *
10467 * Parse an external general entity within an existing parsing context
10468 * An external general parsed entity is well-formed if it matches the
10469 * production labeled extParsedEnt.
10470 *
10471 * [78] extParsedEnt ::= TextDecl? content
10472 *
10473 * Returns 0 if the entity is well formed, -1 in case of args problem and
10474 * the parser error code otherwise
10475 */
10476
10477int
10478xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010479 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010480 xmlParserCtxtPtr ctxt;
10481 xmlDocPtr newDoc;
10482 xmlSAXHandlerPtr oldsax = NULL;
10483 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010484 xmlChar start[4];
10485 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010486
10487 if (ctx->depth > 40) {
10488 return(XML_ERR_ENTITY_LOOP);
10489 }
10490
Daniel Veillardcda96922001-08-21 10:56:31 +000010491 if (lst != NULL)
10492 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010493 if ((URL == NULL) && (ID == NULL))
10494 return(-1);
10495 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10496 return(-1);
10497
10498
10499 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10500 if (ctxt == NULL) return(-1);
10501 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010502 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010503 oldsax = ctxt->sax;
10504 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010505 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 newDoc = xmlNewDoc(BAD_CAST "1.0");
10507 if (newDoc == NULL) {
10508 xmlFreeParserCtxt(ctxt);
10509 return(-1);
10510 }
10511 if (ctx->myDoc != NULL) {
10512 newDoc->intSubset = ctx->myDoc->intSubset;
10513 newDoc->extSubset = ctx->myDoc->extSubset;
10514 }
10515 if (ctx->myDoc->URL != NULL) {
10516 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10517 }
10518 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10519 if (newDoc->children == NULL) {
10520 ctxt->sax = oldsax;
10521 xmlFreeParserCtxt(ctxt);
10522 newDoc->intSubset = NULL;
10523 newDoc->extSubset = NULL;
10524 xmlFreeDoc(newDoc);
10525 return(-1);
10526 }
10527 nodePush(ctxt, newDoc->children);
10528 if (ctx->myDoc == NULL) {
10529 ctxt->myDoc = newDoc;
10530 } else {
10531 ctxt->myDoc = ctx->myDoc;
10532 newDoc->children->doc = ctx->myDoc;
10533 }
10534
Daniel Veillard87a764e2001-06-20 17:41:10 +000010535 /*
10536 * Get the 4 first bytes and decode the charset
10537 * if enc != XML_CHAR_ENCODING_NONE
10538 * plug some encoding conversion routines.
10539 */
10540 GROW
10541 start[0] = RAW;
10542 start[1] = NXT(1);
10543 start[2] = NXT(2);
10544 start[3] = NXT(3);
10545 enc = xmlDetectCharEncoding(start, 4);
10546 if (enc != XML_CHAR_ENCODING_NONE) {
10547 xmlSwitchEncoding(ctxt, enc);
10548 }
10549
Owen Taylor3473f882001-02-23 17:55:21 +000010550 /*
10551 * Parse a possible text declaration first
10552 */
Owen Taylor3473f882001-02-23 17:55:21 +000010553 if ((RAW == '<') && (NXT(1) == '?') &&
10554 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10555 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10556 xmlParseTextDecl(ctxt);
10557 }
10558
10559 /*
10560 * Doing validity checking on chunk doesn't make sense
10561 */
10562 ctxt->instate = XML_PARSER_CONTENT;
10563 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010564 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010565 ctxt->loadsubset = ctx->loadsubset;
10566 ctxt->depth = ctx->depth + 1;
10567 ctxt->replaceEntities = ctx->replaceEntities;
10568 if (ctxt->validate) {
10569 ctxt->vctxt.error = ctx->vctxt.error;
10570 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010571 } else {
10572 ctxt->vctxt.error = NULL;
10573 ctxt->vctxt.warning = NULL;
10574 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010575 ctxt->vctxt.nodeTab = NULL;
10576 ctxt->vctxt.nodeNr = 0;
10577 ctxt->vctxt.nodeMax = 0;
10578 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010579
10580 xmlParseContent(ctxt);
10581
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010582 ctx->validate = ctxt->validate;
10583 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010584 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010585 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010586 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010587 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010588 }
10589 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010590 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010591 }
10592
10593 if (!ctxt->wellFormed) {
10594 if (ctxt->errNo == 0)
10595 ret = 1;
10596 else
10597 ret = ctxt->errNo;
10598 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010599 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010600 xmlNodePtr cur;
10601
10602 /*
10603 * Return the newly created nodeset after unlinking it from
10604 * they pseudo parent.
10605 */
10606 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010607 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010608 while (cur != NULL) {
10609 cur->parent = NULL;
10610 cur = cur->next;
10611 }
10612 newDoc->children->children = NULL;
10613 }
10614 ret = 0;
10615 }
10616 ctxt->sax = oldsax;
10617 xmlFreeParserCtxt(ctxt);
10618 newDoc->intSubset = NULL;
10619 newDoc->extSubset = NULL;
10620 xmlFreeDoc(newDoc);
10621
10622 return(ret);
10623}
10624
10625/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010626 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010627 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010628 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010629 * @sax: the SAX handler bloc (possibly NULL)
10630 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10631 * @depth: Used for loop detection, use 0
10632 * @URL: the URL for the entity to load
10633 * @ID: the System ID for the entity to load
10634 * @list: the return value for the set of parsed nodes
10635 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010636 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010637 *
10638 * Returns 0 if the entity is well formed, -1 in case of args problem and
10639 * the parser error code otherwise
10640 */
10641
Daniel Veillard7d515752003-09-26 19:12:37 +000010642static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010643xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10644 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010645 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010646 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010647 xmlParserCtxtPtr ctxt;
10648 xmlDocPtr newDoc;
10649 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010650 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010651 xmlChar start[4];
10652 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010653
10654 if (depth > 40) {
10655 return(XML_ERR_ENTITY_LOOP);
10656 }
10657
10658
10659
10660 if (list != NULL)
10661 *list = NULL;
10662 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010663 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010664 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010665 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010666
10667
10668 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10669 if (ctxt == NULL) return(-1);
10670 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010671 if (oldctxt != NULL) {
10672 ctxt->_private = oldctxt->_private;
10673 ctxt->loadsubset = oldctxt->loadsubset;
10674 ctxt->validate = oldctxt->validate;
10675 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010676 ctxt->record_info = oldctxt->record_info;
10677 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10678 ctxt->node_seq.length = oldctxt->node_seq.length;
10679 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010680 } else {
10681 /*
10682 * Doing validity checking on chunk without context
10683 * doesn't make sense
10684 */
10685 ctxt->_private = NULL;
10686 ctxt->validate = 0;
10687 ctxt->external = 2;
10688 ctxt->loadsubset = 0;
10689 }
Owen Taylor3473f882001-02-23 17:55:21 +000010690 if (sax != NULL) {
10691 oldsax = ctxt->sax;
10692 ctxt->sax = sax;
10693 if (user_data != NULL)
10694 ctxt->userData = user_data;
10695 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010696 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010697 newDoc = xmlNewDoc(BAD_CAST "1.0");
10698 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010699 ctxt->node_seq.maximum = 0;
10700 ctxt->node_seq.length = 0;
10701 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010702 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010703 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010704 }
10705 if (doc != NULL) {
10706 newDoc->intSubset = doc->intSubset;
10707 newDoc->extSubset = doc->extSubset;
10708 }
10709 if (doc->URL != NULL) {
10710 newDoc->URL = xmlStrdup(doc->URL);
10711 }
10712 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10713 if (newDoc->children == NULL) {
10714 if (sax != NULL)
10715 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010716 ctxt->node_seq.maximum = 0;
10717 ctxt->node_seq.length = 0;
10718 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010719 xmlFreeParserCtxt(ctxt);
10720 newDoc->intSubset = NULL;
10721 newDoc->extSubset = NULL;
10722 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010723 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010724 }
10725 nodePush(ctxt, newDoc->children);
10726 if (doc == NULL) {
10727 ctxt->myDoc = newDoc;
10728 } else {
10729 ctxt->myDoc = doc;
10730 newDoc->children->doc = doc;
10731 }
10732
Daniel Veillard87a764e2001-06-20 17:41:10 +000010733 /*
10734 * Get the 4 first bytes and decode the charset
10735 * if enc != XML_CHAR_ENCODING_NONE
10736 * plug some encoding conversion routines.
10737 */
10738 GROW;
10739 start[0] = RAW;
10740 start[1] = NXT(1);
10741 start[2] = NXT(2);
10742 start[3] = NXT(3);
10743 enc = xmlDetectCharEncoding(start, 4);
10744 if (enc != XML_CHAR_ENCODING_NONE) {
10745 xmlSwitchEncoding(ctxt, enc);
10746 }
10747
Owen Taylor3473f882001-02-23 17:55:21 +000010748 /*
10749 * Parse a possible text declaration first
10750 */
Owen Taylor3473f882001-02-23 17:55:21 +000010751 if ((RAW == '<') && (NXT(1) == '?') &&
10752 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10753 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10754 xmlParseTextDecl(ctxt);
10755 }
10756
Owen Taylor3473f882001-02-23 17:55:21 +000010757 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010758 ctxt->depth = depth;
10759
10760 xmlParseContent(ctxt);
10761
Daniel Veillard561b7f82002-03-20 21:55:57 +000010762 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010763 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010764 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010765 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010766 }
10767 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010768 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010769 }
10770
10771 if (!ctxt->wellFormed) {
10772 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010773 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010774 else
10775 ret = ctxt->errNo;
10776 } else {
10777 if (list != NULL) {
10778 xmlNodePtr cur;
10779
10780 /*
10781 * Return the newly created nodeset after unlinking it from
10782 * they pseudo parent.
10783 */
10784 cur = newDoc->children->children;
10785 *list = cur;
10786 while (cur != NULL) {
10787 cur->parent = NULL;
10788 cur = cur->next;
10789 }
10790 newDoc->children->children = NULL;
10791 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010792 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010793 }
10794 if (sax != NULL)
10795 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010796 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10797 oldctxt->node_seq.length = ctxt->node_seq.length;
10798 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010799 ctxt->node_seq.maximum = 0;
10800 ctxt->node_seq.length = 0;
10801 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010802 xmlFreeParserCtxt(ctxt);
10803 newDoc->intSubset = NULL;
10804 newDoc->extSubset = NULL;
10805 xmlFreeDoc(newDoc);
10806
10807 return(ret);
10808}
10809
10810/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010811 * xmlParseExternalEntity:
10812 * @doc: the document the chunk pertains to
10813 * @sax: the SAX handler bloc (possibly NULL)
10814 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10815 * @depth: Used for loop detection, use 0
10816 * @URL: the URL for the entity to load
10817 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010818 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010819 *
10820 * Parse an external general entity
10821 * An external general parsed entity is well-formed if it matches the
10822 * production labeled extParsedEnt.
10823 *
10824 * [78] extParsedEnt ::= TextDecl? content
10825 *
10826 * Returns 0 if the entity is well formed, -1 in case of args problem and
10827 * the parser error code otherwise
10828 */
10829
10830int
10831xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010832 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010833 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010834 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010835}
10836
10837/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010838 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010839 * @doc: the document the chunk pertains to
10840 * @sax: the SAX handler bloc (possibly NULL)
10841 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10842 * @depth: Used for loop detection, use 0
10843 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010844 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010845 *
10846 * Parse a well-balanced chunk of an XML document
10847 * called by the parser
10848 * The allowed sequence for the Well Balanced Chunk is the one defined by
10849 * the content production in the XML grammar:
10850 *
10851 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10852 *
10853 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10854 * the parser error code otherwise
10855 */
10856
10857int
10858xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010859 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010860 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10861 depth, string, lst, 0 );
10862}
10863
10864/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010865 * xmlParseBalancedChunkMemoryInternal:
10866 * @oldctxt: the existing parsing context
10867 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10868 * @user_data: the user data field for the parser context
10869 * @lst: the return value for the set of parsed nodes
10870 *
10871 *
10872 * Parse a well-balanced chunk of an XML document
10873 * called by the parser
10874 * The allowed sequence for the Well Balanced Chunk is the one defined by
10875 * the content production in the XML grammar:
10876 *
10877 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10878 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010879 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10880 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010881 *
10882 * In case recover is set to 1, the nodelist will not be empty even if
10883 * the parsed chunk is not well balanced.
10884 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010885static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010886xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10887 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10888 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010889 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010890 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010891 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010892 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010893 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010894
10895 if (oldctxt->depth > 40) {
10896 return(XML_ERR_ENTITY_LOOP);
10897 }
10898
10899
10900 if (lst != NULL)
10901 *lst = NULL;
10902 if (string == NULL)
10903 return(-1);
10904
10905 size = xmlStrlen(string);
10906
10907 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10908 if (ctxt == NULL) return(-1);
10909 if (user_data != NULL)
10910 ctxt->userData = user_data;
10911 else
10912 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010913 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10914 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010915
10916 oldsax = ctxt->sax;
10917 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010918 xmlDetectSAX2(ctxt);
10919
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010920 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010921 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010922 newDoc = xmlNewDoc(BAD_CAST "1.0");
10923 if (newDoc == NULL) {
10924 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010925 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010926 xmlFreeParserCtxt(ctxt);
10927 return(-1);
10928 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010929 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010930 } else {
10931 ctxt->myDoc = oldctxt->myDoc;
10932 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010933 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010934 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010935 BAD_CAST "pseudoroot", NULL);
10936 if (ctxt->myDoc->children == NULL) {
10937 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010938 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010939 xmlFreeParserCtxt(ctxt);
10940 if (newDoc != NULL)
10941 xmlFreeDoc(newDoc);
10942 return(-1);
10943 }
10944 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010945 ctxt->instate = XML_PARSER_CONTENT;
10946 ctxt->depth = oldctxt->depth + 1;
10947
Daniel Veillard328f48c2002-11-15 15:24:34 +000010948 ctxt->validate = 0;
10949 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010950 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10951 /*
10952 * ID/IDREF registration will be done in xmlValidateElement below
10953 */
10954 ctxt->loadsubset |= XML_SKIP_IDS;
10955 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010956 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010957
Daniel Veillard68e9e742002-11-16 15:35:11 +000010958 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010959 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010960 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010961 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010962 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010963 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010964 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010965 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010966 }
10967
10968 if (!ctxt->wellFormed) {
10969 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010970 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010971 else
10972 ret = ctxt->errNo;
10973 } else {
10974 ret = 0;
10975 }
10976
10977 if ((lst != NULL) && (ret == 0)) {
10978 xmlNodePtr cur;
10979
10980 /*
10981 * Return the newly created nodeset after unlinking it from
10982 * they pseudo parent.
10983 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010984 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010985 *lst = cur;
10986 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010987 if (oldctxt->validate && oldctxt->wellFormed &&
10988 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10989 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10990 oldctxt->myDoc, cur);
10991 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010992 cur->parent = NULL;
10993 cur = cur->next;
10994 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010995 ctxt->myDoc->children->children = NULL;
10996 }
10997 if (ctxt->myDoc != NULL) {
10998 xmlFreeNode(ctxt->myDoc->children);
10999 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011000 }
11001
11002 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011003 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011004 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011005 if (newDoc != NULL)
11006 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011007
11008 return(ret);
11009}
11010
11011/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011012 * xmlParseBalancedChunkMemoryRecover:
11013 * @doc: the document the chunk pertains to
11014 * @sax: the SAX handler bloc (possibly NULL)
11015 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11016 * @depth: Used for loop detection, use 0
11017 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11018 * @lst: the return value for the set of parsed nodes
11019 * @recover: return nodes even if the data is broken (use 0)
11020 *
11021 *
11022 * Parse a well-balanced chunk of an XML document
11023 * called by the parser
11024 * The allowed sequence for the Well Balanced Chunk is the one defined by
11025 * the content production in the XML grammar:
11026 *
11027 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11028 *
11029 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11030 * the parser error code otherwise
11031 *
11032 * In case recover is set to 1, the nodelist will not be empty even if
11033 * the parsed chunk is not well balanced.
11034 */
11035int
11036xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11037 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11038 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011039 xmlParserCtxtPtr ctxt;
11040 xmlDocPtr newDoc;
11041 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011042 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011043 int size;
11044 int ret = 0;
11045
11046 if (depth > 40) {
11047 return(XML_ERR_ENTITY_LOOP);
11048 }
11049
11050
Daniel Veillardcda96922001-08-21 10:56:31 +000011051 if (lst != NULL)
11052 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011053 if (string == NULL)
11054 return(-1);
11055
11056 size = xmlStrlen(string);
11057
11058 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11059 if (ctxt == NULL) return(-1);
11060 ctxt->userData = ctxt;
11061 if (sax != NULL) {
11062 oldsax = ctxt->sax;
11063 ctxt->sax = sax;
11064 if (user_data != NULL)
11065 ctxt->userData = user_data;
11066 }
11067 newDoc = xmlNewDoc(BAD_CAST "1.0");
11068 if (newDoc == NULL) {
11069 xmlFreeParserCtxt(ctxt);
11070 return(-1);
11071 }
11072 if (doc != NULL) {
11073 newDoc->intSubset = doc->intSubset;
11074 newDoc->extSubset = doc->extSubset;
11075 }
11076 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11077 if (newDoc->children == NULL) {
11078 if (sax != NULL)
11079 ctxt->sax = oldsax;
11080 xmlFreeParserCtxt(ctxt);
11081 newDoc->intSubset = NULL;
11082 newDoc->extSubset = NULL;
11083 xmlFreeDoc(newDoc);
11084 return(-1);
11085 }
11086 nodePush(ctxt, newDoc->children);
11087 if (doc == NULL) {
11088 ctxt->myDoc = newDoc;
11089 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011090 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011091 newDoc->children->doc = doc;
11092 }
11093 ctxt->instate = XML_PARSER_CONTENT;
11094 ctxt->depth = depth;
11095
11096 /*
11097 * Doing validity checking on chunk doesn't make sense
11098 */
11099 ctxt->validate = 0;
11100 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011101 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011102
Daniel Veillardb39bc392002-10-26 19:29:51 +000011103 if ( doc != NULL ){
11104 content = doc->children;
11105 doc->children = NULL;
11106 xmlParseContent(ctxt);
11107 doc->children = content;
11108 }
11109 else {
11110 xmlParseContent(ctxt);
11111 }
Owen Taylor3473f882001-02-23 17:55:21 +000011112 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011113 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011114 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011115 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011116 }
11117 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011119 }
11120
11121 if (!ctxt->wellFormed) {
11122 if (ctxt->errNo == 0)
11123 ret = 1;
11124 else
11125 ret = ctxt->errNo;
11126 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011127 ret = 0;
11128 }
11129
11130 if (lst != NULL && (ret == 0 || recover == 1)) {
11131 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011132
11133 /*
11134 * Return the newly created nodeset after unlinking it from
11135 * they pseudo parent.
11136 */
11137 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011138 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011139 while (cur != NULL) {
11140 cur->parent = NULL;
11141 cur = cur->next;
11142 }
11143 newDoc->children->children = NULL;
11144 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011145
Owen Taylor3473f882001-02-23 17:55:21 +000011146 if (sax != NULL)
11147 ctxt->sax = oldsax;
11148 xmlFreeParserCtxt(ctxt);
11149 newDoc->intSubset = NULL;
11150 newDoc->extSubset = NULL;
11151 xmlFreeDoc(newDoc);
11152
11153 return(ret);
11154}
11155
11156/**
11157 * xmlSAXParseEntity:
11158 * @sax: the SAX handler block
11159 * @filename: the filename
11160 *
11161 * parse an XML external entity out of context and build a tree.
11162 * It use the given SAX function block to handle the parsing callback.
11163 * If sax is NULL, fallback to the default DOM tree building routines.
11164 *
11165 * [78] extParsedEnt ::= TextDecl? content
11166 *
11167 * This correspond to a "Well Balanced" chunk
11168 *
11169 * Returns the resulting document tree
11170 */
11171
11172xmlDocPtr
11173xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11174 xmlDocPtr ret;
11175 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011176
11177 ctxt = xmlCreateFileParserCtxt(filename);
11178 if (ctxt == NULL) {
11179 return(NULL);
11180 }
11181 if (sax != NULL) {
11182 if (ctxt->sax != NULL)
11183 xmlFree(ctxt->sax);
11184 ctxt->sax = sax;
11185 ctxt->userData = NULL;
11186 }
11187
Owen Taylor3473f882001-02-23 17:55:21 +000011188 xmlParseExtParsedEnt(ctxt);
11189
11190 if (ctxt->wellFormed)
11191 ret = ctxt->myDoc;
11192 else {
11193 ret = NULL;
11194 xmlFreeDoc(ctxt->myDoc);
11195 ctxt->myDoc = NULL;
11196 }
11197 if (sax != NULL)
11198 ctxt->sax = NULL;
11199 xmlFreeParserCtxt(ctxt);
11200
11201 return(ret);
11202}
11203
11204/**
11205 * xmlParseEntity:
11206 * @filename: the filename
11207 *
11208 * parse an XML external entity out of context and build a tree.
11209 *
11210 * [78] extParsedEnt ::= TextDecl? content
11211 *
11212 * This correspond to a "Well Balanced" chunk
11213 *
11214 * Returns the resulting document tree
11215 */
11216
11217xmlDocPtr
11218xmlParseEntity(const char *filename) {
11219 return(xmlSAXParseEntity(NULL, filename));
11220}
11221
11222/**
11223 * xmlCreateEntityParserCtxt:
11224 * @URL: the entity URL
11225 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011226 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011227 *
11228 * Create a parser context for an external entity
11229 * Automatic support for ZLIB/Compress compressed document is provided
11230 * by default if found at compile-time.
11231 *
11232 * Returns the new parser context or NULL
11233 */
11234xmlParserCtxtPtr
11235xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11236 const xmlChar *base) {
11237 xmlParserCtxtPtr ctxt;
11238 xmlParserInputPtr inputStream;
11239 char *directory = NULL;
11240 xmlChar *uri;
11241
11242 ctxt = xmlNewParserCtxt();
11243 if (ctxt == NULL) {
11244 return(NULL);
11245 }
11246
11247 uri = xmlBuildURI(URL, base);
11248
11249 if (uri == NULL) {
11250 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11251 if (inputStream == NULL) {
11252 xmlFreeParserCtxt(ctxt);
11253 return(NULL);
11254 }
11255
11256 inputPush(ctxt, inputStream);
11257
11258 if ((ctxt->directory == NULL) && (directory == NULL))
11259 directory = xmlParserGetDirectory((char *)URL);
11260 if ((ctxt->directory == NULL) && (directory != NULL))
11261 ctxt->directory = directory;
11262 } else {
11263 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11264 if (inputStream == NULL) {
11265 xmlFree(uri);
11266 xmlFreeParserCtxt(ctxt);
11267 return(NULL);
11268 }
11269
11270 inputPush(ctxt, inputStream);
11271
11272 if ((ctxt->directory == NULL) && (directory == NULL))
11273 directory = xmlParserGetDirectory((char *)uri);
11274 if ((ctxt->directory == NULL) && (directory != NULL))
11275 ctxt->directory = directory;
11276 xmlFree(uri);
11277 }
Owen Taylor3473f882001-02-23 17:55:21 +000011278 return(ctxt);
11279}
11280
11281/************************************************************************
11282 * *
11283 * Front ends when parsing from a file *
11284 * *
11285 ************************************************************************/
11286
11287/**
11288 * xmlCreateFileParserCtxt:
11289 * @filename: the filename
11290 *
11291 * Create a parser context for a file content.
11292 * Automatic support for ZLIB/Compress compressed document is provided
11293 * by default if found at compile-time.
11294 *
11295 * Returns the new parser context or NULL
11296 */
11297xmlParserCtxtPtr
11298xmlCreateFileParserCtxt(const char *filename)
11299{
11300 xmlParserCtxtPtr ctxt;
11301 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011302 char *directory = NULL;
11303
Owen Taylor3473f882001-02-23 17:55:21 +000011304 ctxt = xmlNewParserCtxt();
11305 if (ctxt == NULL) {
11306 if (xmlDefaultSAXHandler.error != NULL) {
11307 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
11308 }
11309 return(NULL);
11310 }
11311
Igor Zlatkovicce076162003-02-23 13:39:39 +000011312
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011313 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011314 if (inputStream == NULL) {
11315 xmlFreeParserCtxt(ctxt);
11316 return(NULL);
11317 }
11318
Owen Taylor3473f882001-02-23 17:55:21 +000011319 inputPush(ctxt, inputStream);
11320 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011321 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011322 if ((ctxt->directory == NULL) && (directory != NULL))
11323 ctxt->directory = directory;
11324
11325 return(ctxt);
11326}
11327
11328/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011329 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011330 * @sax: the SAX handler block
11331 * @filename: the filename
11332 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11333 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011334 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011335 *
11336 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11337 * compressed document is provided by default if found at compile-time.
11338 * It use the given SAX function block to handle the parsing callback.
11339 * If sax is NULL, fallback to the default DOM tree building routines.
11340 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011341 * User data (void *) is stored within the parser context in the
11342 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011343 *
Owen Taylor3473f882001-02-23 17:55:21 +000011344 * Returns the resulting document tree
11345 */
11346
11347xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011348xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11349 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011350 xmlDocPtr ret;
11351 xmlParserCtxtPtr ctxt;
11352 char *directory = NULL;
11353
Daniel Veillard635ef722001-10-29 11:48:19 +000011354 xmlInitParser();
11355
Owen Taylor3473f882001-02-23 17:55:21 +000011356 ctxt = xmlCreateFileParserCtxt(filename);
11357 if (ctxt == NULL) {
11358 return(NULL);
11359 }
11360 if (sax != NULL) {
11361 if (ctxt->sax != NULL)
11362 xmlFree(ctxt->sax);
11363 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011364 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011365 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011366 if (data!=NULL) {
11367 ctxt->_private=data;
11368 }
Owen Taylor3473f882001-02-23 17:55:21 +000011369
11370 if ((ctxt->directory == NULL) && (directory == NULL))
11371 directory = xmlParserGetDirectory(filename);
11372 if ((ctxt->directory == NULL) && (directory != NULL))
11373 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11374
Daniel Veillarddad3f682002-11-17 16:47:27 +000011375 ctxt->recovery = recovery;
11376
Owen Taylor3473f882001-02-23 17:55:21 +000011377 xmlParseDocument(ctxt);
11378
William M. Brackc07329e2003-09-08 01:57:30 +000011379 if ((ctxt->wellFormed) || recovery) {
11380 ret = ctxt->myDoc;
11381 if (ctxt->input->buf->compressed > 0)
11382 ret->compression = 9;
11383 else
11384 ret->compression = ctxt->input->buf->compressed;
11385 }
Owen Taylor3473f882001-02-23 17:55:21 +000011386 else {
11387 ret = NULL;
11388 xmlFreeDoc(ctxt->myDoc);
11389 ctxt->myDoc = NULL;
11390 }
11391 if (sax != NULL)
11392 ctxt->sax = NULL;
11393 xmlFreeParserCtxt(ctxt);
11394
11395 return(ret);
11396}
11397
11398/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011399 * xmlSAXParseFile:
11400 * @sax: the SAX handler block
11401 * @filename: the filename
11402 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11403 * documents
11404 *
11405 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11406 * compressed document is provided by default if found at compile-time.
11407 * It use the given SAX function block to handle the parsing callback.
11408 * If sax is NULL, fallback to the default DOM tree building routines.
11409 *
11410 * Returns the resulting document tree
11411 */
11412
11413xmlDocPtr
11414xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11415 int recovery) {
11416 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11417}
11418
11419/**
Owen Taylor3473f882001-02-23 17:55:21 +000011420 * xmlRecoverDoc:
11421 * @cur: a pointer to an array of xmlChar
11422 *
11423 * parse an XML in-memory document and build a tree.
11424 * In the case the document is not Well Formed, a tree is built anyway
11425 *
11426 * Returns the resulting document tree
11427 */
11428
11429xmlDocPtr
11430xmlRecoverDoc(xmlChar *cur) {
11431 return(xmlSAXParseDoc(NULL, cur, 1));
11432}
11433
11434/**
11435 * xmlParseFile:
11436 * @filename: the filename
11437 *
11438 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11439 * compressed document is provided by default if found at compile-time.
11440 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011441 * Returns the resulting document tree if the file was wellformed,
11442 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011443 */
11444
11445xmlDocPtr
11446xmlParseFile(const char *filename) {
11447 return(xmlSAXParseFile(NULL, filename, 0));
11448}
11449
11450/**
11451 * xmlRecoverFile:
11452 * @filename: the filename
11453 *
11454 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11455 * compressed document is provided by default if found at compile-time.
11456 * In the case the document is not Well Formed, a tree is built anyway
11457 *
11458 * Returns the resulting document tree
11459 */
11460
11461xmlDocPtr
11462xmlRecoverFile(const char *filename) {
11463 return(xmlSAXParseFile(NULL, filename, 1));
11464}
11465
11466
11467/**
11468 * xmlSetupParserForBuffer:
11469 * @ctxt: an XML parser context
11470 * @buffer: a xmlChar * buffer
11471 * @filename: a file name
11472 *
11473 * Setup the parser context to parse a new buffer; Clears any prior
11474 * contents from the parser context. The buffer parameter must not be
11475 * NULL, but the filename parameter can be
11476 */
11477void
11478xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11479 const char* filename)
11480{
11481 xmlParserInputPtr input;
11482
11483 input = xmlNewInputStream(ctxt);
11484 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000011485 xmlGenericError(xmlGenericErrorContext,
11486 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000011487 xmlFree(ctxt);
11488 return;
11489 }
11490
11491 xmlClearParserCtxt(ctxt);
11492 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011493 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011494 input->base = buffer;
11495 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011496 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011497 inputPush(ctxt, input);
11498}
11499
11500/**
11501 * xmlSAXUserParseFile:
11502 * @sax: a SAX handler
11503 * @user_data: The user data returned on SAX callbacks
11504 * @filename: a file name
11505 *
11506 * parse an XML file and call the given SAX handler routines.
11507 * Automatic support for ZLIB/Compress compressed document is provided
11508 *
11509 * Returns 0 in case of success or a error number otherwise
11510 */
11511int
11512xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11513 const char *filename) {
11514 int ret = 0;
11515 xmlParserCtxtPtr ctxt;
11516
11517 ctxt = xmlCreateFileParserCtxt(filename);
11518 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000011519 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000011520 xmlFree(ctxt->sax);
11521 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011522 xmlDetectSAX2(ctxt);
11523
Owen Taylor3473f882001-02-23 17:55:21 +000011524 if (user_data != NULL)
11525 ctxt->userData = user_data;
11526
11527 xmlParseDocument(ctxt);
11528
11529 if (ctxt->wellFormed)
11530 ret = 0;
11531 else {
11532 if (ctxt->errNo != 0)
11533 ret = ctxt->errNo;
11534 else
11535 ret = -1;
11536 }
11537 if (sax != NULL)
11538 ctxt->sax = NULL;
11539 xmlFreeParserCtxt(ctxt);
11540
11541 return ret;
11542}
11543
11544/************************************************************************
11545 * *
11546 * Front ends when parsing from memory *
11547 * *
11548 ************************************************************************/
11549
11550/**
11551 * xmlCreateMemoryParserCtxt:
11552 * @buffer: a pointer to a char array
11553 * @size: the size of the array
11554 *
11555 * Create a parser context for an XML in-memory document.
11556 *
11557 * Returns the new parser context or NULL
11558 */
11559xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011560xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011561 xmlParserCtxtPtr ctxt;
11562 xmlParserInputPtr input;
11563 xmlParserInputBufferPtr buf;
11564
11565 if (buffer == NULL)
11566 return(NULL);
11567 if (size <= 0)
11568 return(NULL);
11569
11570 ctxt = xmlNewParserCtxt();
11571 if (ctxt == NULL)
11572 return(NULL);
11573
Daniel Veillard53350552003-09-18 13:35:51 +000011574 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011575 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011576 if (buf == NULL) {
11577 xmlFreeParserCtxt(ctxt);
11578 return(NULL);
11579 }
Owen Taylor3473f882001-02-23 17:55:21 +000011580
11581 input = xmlNewInputStream(ctxt);
11582 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011583 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011584 xmlFreeParserCtxt(ctxt);
11585 return(NULL);
11586 }
11587
11588 input->filename = NULL;
11589 input->buf = buf;
11590 input->base = input->buf->buffer->content;
11591 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011592 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011593
11594 inputPush(ctxt, input);
11595 return(ctxt);
11596}
11597
11598/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011599 * xmlSAXParseMemoryWithData:
11600 * @sax: the SAX handler block
11601 * @buffer: an pointer to a char array
11602 * @size: the size of the array
11603 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11604 * documents
11605 * @data: the userdata
11606 *
11607 * parse an XML in-memory block and use the given SAX function block
11608 * to handle the parsing callback. If sax is NULL, fallback to the default
11609 * DOM tree building routines.
11610 *
11611 * User data (void *) is stored within the parser context in the
11612 * context's _private member, so it is available nearly everywhere in libxml
11613 *
11614 * Returns the resulting document tree
11615 */
11616
11617xmlDocPtr
11618xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11619 int size, int recovery, void *data) {
11620 xmlDocPtr ret;
11621 xmlParserCtxtPtr ctxt;
11622
11623 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11624 if (ctxt == NULL) return(NULL);
11625 if (sax != NULL) {
11626 if (ctxt->sax != NULL)
11627 xmlFree(ctxt->sax);
11628 ctxt->sax = sax;
11629 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011630 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011631 if (data!=NULL) {
11632 ctxt->_private=data;
11633 }
11634
Daniel Veillardadba5f12003-04-04 16:09:01 +000011635 ctxt->recovery = recovery;
11636
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011637 xmlParseDocument(ctxt);
11638
11639 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11640 else {
11641 ret = NULL;
11642 xmlFreeDoc(ctxt->myDoc);
11643 ctxt->myDoc = NULL;
11644 }
11645 if (sax != NULL)
11646 ctxt->sax = NULL;
11647 xmlFreeParserCtxt(ctxt);
11648
11649 return(ret);
11650}
11651
11652/**
Owen Taylor3473f882001-02-23 17:55:21 +000011653 * xmlSAXParseMemory:
11654 * @sax: the SAX handler block
11655 * @buffer: an pointer to a char array
11656 * @size: the size of the array
11657 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11658 * documents
11659 *
11660 * parse an XML in-memory block and use the given SAX function block
11661 * to handle the parsing callback. If sax is NULL, fallback to the default
11662 * DOM tree building routines.
11663 *
11664 * Returns the resulting document tree
11665 */
11666xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011667xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11668 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011669 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011670}
11671
11672/**
11673 * xmlParseMemory:
11674 * @buffer: an pointer to a char array
11675 * @size: the size of the array
11676 *
11677 * parse an XML in-memory block and build a tree.
11678 *
11679 * Returns the resulting document tree
11680 */
11681
Daniel Veillard50822cb2001-07-26 20:05:51 +000011682xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011683 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11684}
11685
11686/**
11687 * xmlRecoverMemory:
11688 * @buffer: an pointer to a char array
11689 * @size: the size of the array
11690 *
11691 * parse an XML in-memory block and build a tree.
11692 * In the case the document is not Well Formed, a tree is built anyway
11693 *
11694 * Returns the resulting document tree
11695 */
11696
Daniel Veillard50822cb2001-07-26 20:05:51 +000011697xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011698 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11699}
11700
11701/**
11702 * xmlSAXUserParseMemory:
11703 * @sax: a SAX handler
11704 * @user_data: The user data returned on SAX callbacks
11705 * @buffer: an in-memory XML document input
11706 * @size: the length of the XML document in bytes
11707 *
11708 * A better SAX parsing routine.
11709 * parse an XML in-memory buffer and call the given SAX handler routines.
11710 *
11711 * Returns 0 in case of success or a error number otherwise
11712 */
11713int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011714 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011715 int ret = 0;
11716 xmlParserCtxtPtr ctxt;
11717 xmlSAXHandlerPtr oldsax = NULL;
11718
Daniel Veillard9e923512002-08-14 08:48:52 +000011719 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011720 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11721 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011722 oldsax = ctxt->sax;
11723 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011724 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011725 if (user_data != NULL)
11726 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011727
11728 xmlParseDocument(ctxt);
11729
11730 if (ctxt->wellFormed)
11731 ret = 0;
11732 else {
11733 if (ctxt->errNo != 0)
11734 ret = ctxt->errNo;
11735 else
11736 ret = -1;
11737 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011738 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011739 xmlFreeParserCtxt(ctxt);
11740
11741 return ret;
11742}
11743
11744/**
11745 * xmlCreateDocParserCtxt:
11746 * @cur: a pointer to an array of xmlChar
11747 *
11748 * Creates a parser context for an XML in-memory document.
11749 *
11750 * Returns the new parser context or NULL
11751 */
11752xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011753xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011754 int len;
11755
11756 if (cur == NULL)
11757 return(NULL);
11758 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011759 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011760}
11761
11762/**
11763 * xmlSAXParseDoc:
11764 * @sax: the SAX handler block
11765 * @cur: a pointer to an array of xmlChar
11766 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11767 * documents
11768 *
11769 * parse an XML in-memory document and build a tree.
11770 * It use the given SAX function block to handle the parsing callback.
11771 * If sax is NULL, fallback to the default DOM tree building routines.
11772 *
11773 * Returns the resulting document tree
11774 */
11775
11776xmlDocPtr
11777xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11778 xmlDocPtr ret;
11779 xmlParserCtxtPtr ctxt;
11780
11781 if (cur == NULL) return(NULL);
11782
11783
11784 ctxt = xmlCreateDocParserCtxt(cur);
11785 if (ctxt == NULL) return(NULL);
11786 if (sax != NULL) {
11787 ctxt->sax = sax;
11788 ctxt->userData = NULL;
11789 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011790 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011791
11792 xmlParseDocument(ctxt);
11793 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11794 else {
11795 ret = NULL;
11796 xmlFreeDoc(ctxt->myDoc);
11797 ctxt->myDoc = NULL;
11798 }
11799 if (sax != NULL)
11800 ctxt->sax = NULL;
11801 xmlFreeParserCtxt(ctxt);
11802
11803 return(ret);
11804}
11805
11806/**
11807 * xmlParseDoc:
11808 * @cur: a pointer to an array of xmlChar
11809 *
11810 * parse an XML in-memory document and build a tree.
11811 *
11812 * Returns the resulting document tree
11813 */
11814
11815xmlDocPtr
11816xmlParseDoc(xmlChar *cur) {
11817 return(xmlSAXParseDoc(NULL, cur, 0));
11818}
11819
Daniel Veillard8107a222002-01-13 14:10:10 +000011820/************************************************************************
11821 * *
11822 * Specific function to keep track of entities references *
11823 * and used by the XSLT debugger *
11824 * *
11825 ************************************************************************/
11826
11827static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11828
11829/**
11830 * xmlAddEntityReference:
11831 * @ent : A valid entity
11832 * @firstNode : A valid first node for children of entity
11833 * @lastNode : A valid last node of children entity
11834 *
11835 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11836 */
11837static void
11838xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11839 xmlNodePtr lastNode)
11840{
11841 if (xmlEntityRefFunc != NULL) {
11842 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11843 }
11844}
11845
11846
11847/**
11848 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011849 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011850 *
11851 * Set the function to call call back when a xml reference has been made
11852 */
11853void
11854xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11855{
11856 xmlEntityRefFunc = func;
11857}
Owen Taylor3473f882001-02-23 17:55:21 +000011858
11859/************************************************************************
11860 * *
11861 * Miscellaneous *
11862 * *
11863 ************************************************************************/
11864
11865#ifdef LIBXML_XPATH_ENABLED
11866#include <libxml/xpath.h>
11867#endif
11868
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011869extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011870static int xmlParserInitialized = 0;
11871
11872/**
11873 * xmlInitParser:
11874 *
11875 * Initialization function for the XML parser.
11876 * This is not reentrant. Call once before processing in case of
11877 * use in multithreaded programs.
11878 */
11879
11880void
11881xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011882 if (xmlParserInitialized != 0)
11883 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011884
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011885 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11886 (xmlGenericError == NULL))
11887 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011888 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011889 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011890 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011891 xmlInitCharEncodingHandlers();
11892 xmlInitializePredefinedEntities();
11893 xmlDefaultSAXHandlerInit();
11894 xmlRegisterDefaultInputCallbacks();
11895 xmlRegisterDefaultOutputCallbacks();
11896#ifdef LIBXML_HTML_ENABLED
11897 htmlInitAutoClose();
11898 htmlDefaultSAXHandlerInit();
11899#endif
11900#ifdef LIBXML_XPATH_ENABLED
11901 xmlXPathInit();
11902#endif
11903 xmlParserInitialized = 1;
11904}
11905
11906/**
11907 * xmlCleanupParser:
11908 *
11909 * Cleanup function for the XML parser. It tries to reclaim all
11910 * parsing related global memory allocated for the parser processing.
11911 * It doesn't deallocate any document related memory. Calling this
11912 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011913 * One should call xmlCleanupParser() only when the process has
11914 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011915 */
11916
11917void
11918xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011919 if (!xmlParserInitialized)
11920 return;
11921
Owen Taylor3473f882001-02-23 17:55:21 +000011922 xmlCleanupCharEncodingHandlers();
11923 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011924#ifdef LIBXML_CATALOG_ENABLED
11925 xmlCatalogCleanup();
11926#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011927 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011928 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011929 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011930}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011931
11932/************************************************************************
11933 * *
11934 * New set (2.6.0) of simpler and more flexible APIs *
11935 * *
11936 ************************************************************************/
11937
11938/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011939 * DICT_FREE:
11940 * @str: a string
11941 *
11942 * Free a string if it is not owned by the "dict" dictionnary in the
11943 * current scope
11944 */
11945#define DICT_FREE(str) \
11946 if ((str) && ((!dict) || \
11947 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11948 xmlFree((char *)(str));
11949
11950/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011951 * xmlCtxtReset:
11952 * @ctxt: an XML parser context
11953 *
11954 * Reset a parser context
11955 */
11956void
11957xmlCtxtReset(xmlParserCtxtPtr ctxt)
11958{
11959 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011960 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011961
11962 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11963 xmlFreeInputStream(input);
11964 }
11965 ctxt->inputNr = 0;
11966 ctxt->input = NULL;
11967
11968 ctxt->spaceNr = 0;
11969 ctxt->spaceTab[0] = -1;
11970 ctxt->space = &ctxt->spaceTab[0];
11971
11972
11973 ctxt->nodeNr = 0;
11974 ctxt->node = NULL;
11975
11976 ctxt->nameNr = 0;
11977 ctxt->name = NULL;
11978
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011979 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011980 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011981 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011982 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011983 DICT_FREE(ctxt->directory);
11984 ctxt->directory = NULL;
11985 DICT_FREE(ctxt->extSubURI);
11986 ctxt->extSubURI = NULL;
11987 DICT_FREE(ctxt->extSubSystem);
11988 ctxt->extSubSystem = NULL;
11989 if (ctxt->myDoc != NULL)
11990 xmlFreeDoc(ctxt->myDoc);
11991 ctxt->myDoc = NULL;
11992
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011993 ctxt->standalone = -1;
11994 ctxt->hasExternalSubset = 0;
11995 ctxt->hasPErefs = 0;
11996 ctxt->html = 0;
11997 ctxt->external = 0;
11998 ctxt->instate = XML_PARSER_START;
11999 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012000
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012001 ctxt->wellFormed = 1;
12002 ctxt->nsWellFormed = 1;
12003 ctxt->valid = 1;
12004 ctxt->vctxt.userData = ctxt;
12005 ctxt->vctxt.error = xmlParserValidityError;
12006 ctxt->vctxt.warning = xmlParserValidityWarning;
12007 ctxt->record_info = 0;
12008 ctxt->nbChars = 0;
12009 ctxt->checkIndex = 0;
12010 ctxt->inSubset = 0;
12011 ctxt->errNo = XML_ERR_OK;
12012 ctxt->depth = 0;
12013 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12014 ctxt->catalogs = NULL;
12015 xmlInitNodeInfoSeq(&ctxt->node_seq);
12016
12017 if (ctxt->attsDefault != NULL) {
12018 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12019 ctxt->attsDefault = NULL;
12020 }
12021 if (ctxt->attsSpecial != NULL) {
12022 xmlHashFree(ctxt->attsSpecial, NULL);
12023 ctxt->attsSpecial = NULL;
12024 }
12025
12026 if (ctxt->catalogs != NULL)
12027 xmlCatalogFreeLocal(ctxt->catalogs);
12028}
12029
12030/**
12031 * xmlCtxtUseOptions:
12032 * @ctxt: an XML parser context
12033 * @options: a combination of xmlParserOption(s)
12034 *
12035 * Applies the options to the parser context
12036 *
12037 * Returns 0 in case of success, the set of unknown or unimplemented options
12038 * in case of error.
12039 */
12040int
12041xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12042{
12043 if (options & XML_PARSE_RECOVER) {
12044 ctxt->recovery = 1;
12045 options -= XML_PARSE_RECOVER;
12046 } else
12047 ctxt->recovery = 0;
12048 if (options & XML_PARSE_DTDLOAD) {
12049 ctxt->loadsubset = XML_DETECT_IDS;
12050 options -= XML_PARSE_DTDLOAD;
12051 } else
12052 ctxt->loadsubset = 0;
12053 if (options & XML_PARSE_DTDATTR) {
12054 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12055 options -= XML_PARSE_DTDATTR;
12056 }
12057 if (options & XML_PARSE_NOENT) {
12058 ctxt->replaceEntities = 1;
12059 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12060 options -= XML_PARSE_NOENT;
12061 } else
12062 ctxt->replaceEntities = 0;
12063 if (options & XML_PARSE_NOWARNING) {
12064 ctxt->sax->warning = NULL;
12065 options -= XML_PARSE_NOWARNING;
12066 }
12067 if (options & XML_PARSE_NOERROR) {
12068 ctxt->sax->error = NULL;
12069 ctxt->sax->fatalError = NULL;
12070 options -= XML_PARSE_NOERROR;
12071 }
12072 if (options & XML_PARSE_PEDANTIC) {
12073 ctxt->pedantic = 1;
12074 options -= XML_PARSE_PEDANTIC;
12075 } else
12076 ctxt->pedantic = 0;
12077 if (options & XML_PARSE_NOBLANKS) {
12078 ctxt->keepBlanks = 0;
12079 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12080 options -= XML_PARSE_NOBLANKS;
12081 } else
12082 ctxt->keepBlanks = 1;
12083 if (options & XML_PARSE_DTDVALID) {
12084 ctxt->validate = 1;
12085 if (options & XML_PARSE_NOWARNING)
12086 ctxt->vctxt.warning = NULL;
12087 if (options & XML_PARSE_NOERROR)
12088 ctxt->vctxt.error = NULL;
12089 options -= XML_PARSE_DTDVALID;
12090 } else
12091 ctxt->validate = 0;
12092 if (options & XML_PARSE_SAX1) {
12093 ctxt->sax->startElement = xmlSAX2StartElement;
12094 ctxt->sax->endElement = xmlSAX2EndElement;
12095 ctxt->sax->startElementNs = NULL;
12096 ctxt->sax->endElementNs = NULL;
12097 ctxt->sax->initialized = 1;
12098 options -= XML_PARSE_SAX1;
12099 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012100 if (options & XML_PARSE_NODICT) {
12101 ctxt->dictNames = 0;
12102 options -= XML_PARSE_NODICT;
12103 } else {
12104 ctxt->dictNames = 1;
12105 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012106 if (options & XML_PARSE_NOCDATA) {
12107 ctxt->sax->cdataBlock = NULL;
12108 options -= XML_PARSE_NOCDATA;
12109 }
12110 if (options & XML_PARSE_NSCLEAN) {
12111 ctxt->options |= XML_PARSE_NSCLEAN;
12112 options -= XML_PARSE_NSCLEAN;
12113 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012114 return (options);
12115}
12116
12117/**
12118 * xmlDoRead:
12119 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012120 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012121 * @encoding: the document encoding, or NULL
12122 * @options: a combination of xmlParserOption(s)
12123 * @reuse: keep the context for reuse
12124 *
12125 * Common front-end for the xmlRead functions
12126 *
12127 * Returns the resulting document tree or NULL
12128 */
12129static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012130xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12131 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012132{
12133 xmlDocPtr ret;
12134
12135 xmlCtxtUseOptions(ctxt, options);
12136 if (encoding != NULL) {
12137 xmlCharEncodingHandlerPtr hdlr;
12138
12139 hdlr = xmlFindCharEncodingHandler(encoding);
12140 if (hdlr != NULL)
12141 xmlSwitchToEncoding(ctxt, hdlr);
12142 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012143 if ((URL != NULL) && (ctxt->input != NULL) &&
12144 (ctxt->input->filename == NULL))
12145 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012146 xmlParseDocument(ctxt);
12147 if ((ctxt->wellFormed) || ctxt->recovery)
12148 ret = ctxt->myDoc;
12149 else {
12150 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012151 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012152 if ((ctxt->dictNames) &&
12153 (ctxt->myDoc->dict == ctxt->dict))
12154 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012155 xmlFreeDoc(ctxt->myDoc);
12156 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012157 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012158 ctxt->myDoc = NULL;
12159 if (!reuse) {
12160 if ((ctxt->dictNames) &&
12161 (ret != NULL) &&
12162 (ret->dict == ctxt->dict))
12163 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012164 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012165 } else {
12166 /* Must duplicate the reference to the dictionary */
12167 if ((ctxt->dictNames) &&
12168 (ret != NULL) &&
12169 (ret->dict == ctxt->dict))
12170 xmlDictReference(ctxt->dict);
12171 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012172
12173 return (ret);
12174}
12175
12176/**
12177 * xmlReadDoc:
12178 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012179 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012180 * @encoding: the document encoding, or NULL
12181 * @options: a combination of xmlParserOption(s)
12182 *
12183 * parse an XML in-memory document and build a tree.
12184 *
12185 * Returns the resulting document tree
12186 */
12187xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012188xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012189{
12190 xmlParserCtxtPtr ctxt;
12191
12192 if (cur == NULL)
12193 return (NULL);
12194
12195 ctxt = xmlCreateDocParserCtxt(cur);
12196 if (ctxt == NULL)
12197 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012198 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012199}
12200
12201/**
12202 * xmlReadFile:
12203 * @filename: a file or URL
12204 * @encoding: the document encoding, or NULL
12205 * @options: a combination of xmlParserOption(s)
12206 *
12207 * parse an XML file from the filesystem or the network.
12208 *
12209 * Returns the resulting document tree
12210 */
12211xmlDocPtr
12212xmlReadFile(const char *filename, const char *encoding, int options)
12213{
12214 xmlParserCtxtPtr ctxt;
12215
12216 ctxt = xmlCreateFileParserCtxt(filename);
12217 if (ctxt == NULL)
12218 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012219 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012220}
12221
12222/**
12223 * xmlReadMemory:
12224 * @buffer: a pointer to a char array
12225 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012226 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012227 * @encoding: the document encoding, or NULL
12228 * @options: a combination of xmlParserOption(s)
12229 *
12230 * parse an XML in-memory document and build a tree.
12231 *
12232 * Returns the resulting document tree
12233 */
12234xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012235xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012236{
12237 xmlParserCtxtPtr ctxt;
12238
12239 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12240 if (ctxt == NULL)
12241 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012242 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012243}
12244
12245/**
12246 * xmlReadFd:
12247 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012248 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012249 * @encoding: the document encoding, or NULL
12250 * @options: a combination of xmlParserOption(s)
12251 *
12252 * parse an XML from a file descriptor and build a tree.
12253 *
12254 * Returns the resulting document tree
12255 */
12256xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012257xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258{
12259 xmlParserCtxtPtr ctxt;
12260 xmlParserInputBufferPtr input;
12261 xmlParserInputPtr stream;
12262
12263 if (fd < 0)
12264 return (NULL);
12265
12266 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12267 if (input == NULL)
12268 return (NULL);
12269 ctxt = xmlNewParserCtxt();
12270 if (ctxt == NULL) {
12271 xmlFreeParserInputBuffer(input);
12272 return (NULL);
12273 }
12274 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12275 if (stream == NULL) {
12276 xmlFreeParserInputBuffer(input);
12277 xmlFreeParserCtxt(ctxt);
12278 return (NULL);
12279 }
12280 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012281 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012282}
12283
12284/**
12285 * xmlReadIO:
12286 * @ioread: an I/O read function
12287 * @ioclose: an I/O close function
12288 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012289 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012290 * @encoding: the document encoding, or NULL
12291 * @options: a combination of xmlParserOption(s)
12292 *
12293 * parse an XML document from I/O functions and source and build a tree.
12294 *
12295 * Returns the resulting document tree
12296 */
12297xmlDocPtr
12298xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012299 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012300{
12301 xmlParserCtxtPtr ctxt;
12302 xmlParserInputBufferPtr input;
12303 xmlParserInputPtr stream;
12304
12305 if (ioread == NULL)
12306 return (NULL);
12307
12308 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12309 XML_CHAR_ENCODING_NONE);
12310 if (input == NULL)
12311 return (NULL);
12312 ctxt = xmlNewParserCtxt();
12313 if (ctxt == NULL) {
12314 xmlFreeParserInputBuffer(input);
12315 return (NULL);
12316 }
12317 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12318 if (stream == NULL) {
12319 xmlFreeParserInputBuffer(input);
12320 xmlFreeParserCtxt(ctxt);
12321 return (NULL);
12322 }
12323 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012324 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012325}
12326
12327/**
12328 * xmlCtxtReadDoc:
12329 * @ctxt: an XML parser context
12330 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012331 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012332 * @encoding: the document encoding, or NULL
12333 * @options: a combination of xmlParserOption(s)
12334 *
12335 * parse an XML in-memory document and build a tree.
12336 * This reuses the existing @ctxt parser context
12337 *
12338 * Returns the resulting document tree
12339 */
12340xmlDocPtr
12341xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012342 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012343{
12344 xmlParserInputPtr stream;
12345
12346 if (cur == NULL)
12347 return (NULL);
12348 if (ctxt == NULL)
12349 return (NULL);
12350
12351 xmlCtxtReset(ctxt);
12352
12353 stream = xmlNewStringInputStream(ctxt, cur);
12354 if (stream == NULL) {
12355 return (NULL);
12356 }
12357 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012358 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012359}
12360
12361/**
12362 * xmlCtxtReadFile:
12363 * @ctxt: an XML parser context
12364 * @filename: a file or URL
12365 * @encoding: the document encoding, or NULL
12366 * @options: a combination of xmlParserOption(s)
12367 *
12368 * parse an XML file from the filesystem or the network.
12369 * This reuses the existing @ctxt parser context
12370 *
12371 * Returns the resulting document tree
12372 */
12373xmlDocPtr
12374xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12375 const char *encoding, int options)
12376{
12377 xmlParserInputPtr stream;
12378
12379 if (filename == NULL)
12380 return (NULL);
12381 if (ctxt == NULL)
12382 return (NULL);
12383
12384 xmlCtxtReset(ctxt);
12385
12386 stream = xmlNewInputFromFile(ctxt, filename);
12387 if (stream == NULL) {
12388 return (NULL);
12389 }
12390 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012391 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012392}
12393
12394/**
12395 * xmlCtxtReadMemory:
12396 * @ctxt: an XML parser context
12397 * @buffer: a pointer to a char array
12398 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012399 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012400 * @encoding: the document encoding, or NULL
12401 * @options: a combination of xmlParserOption(s)
12402 *
12403 * parse an XML in-memory document and build a tree.
12404 * This reuses the existing @ctxt parser context
12405 *
12406 * Returns the resulting document tree
12407 */
12408xmlDocPtr
12409xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012410 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012411{
12412 xmlParserInputBufferPtr input;
12413 xmlParserInputPtr stream;
12414
12415 if (ctxt == NULL)
12416 return (NULL);
12417 if (buffer == NULL)
12418 return (NULL);
12419
12420 xmlCtxtReset(ctxt);
12421
12422 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12423 if (input == NULL) {
12424 return(NULL);
12425 }
12426
12427 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12428 if (stream == NULL) {
12429 xmlFreeParserInputBuffer(input);
12430 return(NULL);
12431 }
12432
12433 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012434 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012435}
12436
12437/**
12438 * xmlCtxtReadFd:
12439 * @ctxt: an XML parser context
12440 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012441 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012442 * @encoding: the document encoding, or NULL
12443 * @options: a combination of xmlParserOption(s)
12444 *
12445 * parse an XML from a file descriptor and build a tree.
12446 * This reuses the existing @ctxt parser context
12447 *
12448 * Returns the resulting document tree
12449 */
12450xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012451xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12452 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012453{
12454 xmlParserInputBufferPtr input;
12455 xmlParserInputPtr stream;
12456
12457 if (fd < 0)
12458 return (NULL);
12459 if (ctxt == NULL)
12460 return (NULL);
12461
12462 xmlCtxtReset(ctxt);
12463
12464
12465 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12466 if (input == NULL)
12467 return (NULL);
12468 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12469 if (stream == NULL) {
12470 xmlFreeParserInputBuffer(input);
12471 return (NULL);
12472 }
12473 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012474 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012475}
12476
12477/**
12478 * xmlCtxtReadIO:
12479 * @ctxt: an XML parser context
12480 * @ioread: an I/O read function
12481 * @ioclose: an I/O close function
12482 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012483 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012484 * @encoding: the document encoding, or NULL
12485 * @options: a combination of xmlParserOption(s)
12486 *
12487 * parse an XML document from I/O functions and source and build a tree.
12488 * This reuses the existing @ctxt parser context
12489 *
12490 * Returns the resulting document tree
12491 */
12492xmlDocPtr
12493xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12494 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012495 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012496 const char *encoding, int options)
12497{
12498 xmlParserInputBufferPtr input;
12499 xmlParserInputPtr stream;
12500
12501 if (ioread == NULL)
12502 return (NULL);
12503 if (ctxt == NULL)
12504 return (NULL);
12505
12506 xmlCtxtReset(ctxt);
12507
12508 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12509 XML_CHAR_ENCODING_NONE);
12510 if (input == NULL)
12511 return (NULL);
12512 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12513 if (stream == NULL) {
12514 xmlFreeParserInputBuffer(input);
12515 return (NULL);
12516 }
12517 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012518 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012519}