blob: 438dd3c62d7ab82e2eac649e97c30bf0e61fd90f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard0fb18932003-09-07 09:14:37 +000088#define SAX2 1
89
Daniel Veillard21a0f912001-02-25 19:54:14 +000090#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000091#define XML_PARSER_BUFFER_SIZE 100
92
Daniel Veillard5997aca2002-03-18 18:36:20 +000093#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
94
Owen Taylor3473f882001-02-23 17:55:21 +000095/*
Owen Taylor3473f882001-02-23 17:55:21 +000096 * List of XML prefixed PI allowed by W3C specs
97 */
98
Daniel Veillardb44025c2001-10-11 22:55:55 +000099static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000100 "xml-stylesheet",
101 NULL
102};
103
104/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000105xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
106 const xmlChar **str);
107
Daniel Veillard7d515752003-09-26 19:12:37 +0000108static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000109xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
110 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000111 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000112 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000113
Daniel Veillard8107a222002-01-13 14:10:10 +0000114static void
115xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
116 xmlNodePtr lastNode);
117
Daniel Veillard7d515752003-09-26 19:12:37 +0000118static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000119xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
120 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000121
122/************************************************************************
123 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000124 * Some factorized error routines *
125 * *
126 ************************************************************************/
127
128/**
129 * xmlErrMemory:
130 * @ctxt: an XML parser context
131 * @extra: extra informations
132 *
133 * Handle a redefinition of attribute error
134 */
135static void
136xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
137{
138 if (ctxt != NULL) {
139 ctxt->errNo = XML_ERR_NO_MEMORY;
140 ctxt->instate = XML_PARSER_EOF;
141 ctxt->disableSAX = 1;
142 }
143 if ((ctxt != NULL) && (ctxt->sax != NULL)
144 && (ctxt->sax->error != NULL)) {
145 if (extra)
146 ctxt->sax->error(ctxt->userData,
147 "Memory allocation failed : %s\n", extra);
148 else
149 ctxt->sax->error(ctxt->userData,
150 "Memory allocation failed !\n");
151 } else {
152 if (extra)
153 xmlGenericError(xmlGenericErrorContext,
154 "Memory allocation failed : %s\n", extra);
155 else
156 xmlGenericError(xmlGenericErrorContext,
157 "Memory allocation failed !\n");
158 }
159}
160
161/**
162 * xmlErrAttributeDup:
163 * @ctxt: an XML parser context
164 * @prefix: the attribute prefix
165 * @localname: the attribute localname
166 *
167 * Handle a redefinition of attribute error
168 */
169static void
170xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
171 const xmlChar * localname)
172{
173 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
175 if (prefix == NULL)
176 ctxt->sax->error(ctxt->userData,
177 "Attribute %s redefined\n", localname);
178 else
179 ctxt->sax->error(ctxt->userData,
180 "Attribute %s:%s redefined\n", prefix,
181 localname);
182 }
183 ctxt->wellFormed = 0;
184 if (ctxt->recovery == 0)
185 ctxt->disableSAX = 1;
186}
187
188/**
189 * xmlFatalErr:
190 * @ctxt: an XML parser context
191 * @error: the error number
192 * @extra: extra information string
193 *
194 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
195 */
196static void
197xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char * info)
198{
199 const char *errmsg;
200
201 if (ctxt == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "xmlFatalErr: no context !\n");
204 return;
205 }
206 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
207 return;
208 switch (error) {
209 case XML_ERR_INVALID_HEX_CHARREF:
210 errmsg = "CharRef: invalid hexadecimal value\n";
211 break;
212 case XML_ERR_INVALID_DEC_CHARREF:
213 errmsg = "CharRef: invalid decimal value\n";
214 break;
215 case XML_ERR_INVALID_CHARREF:
216 errmsg = "CharRef: invalid value\n";
217 break;
218 case XML_ERR_INTERNAL_ERROR:
219 errmsg = "internal error";
220 break;
221 case XML_ERR_PEREF_AT_EOF:
222 errmsg = "PEReference at end of document\n";
223 break;
224 case XML_ERR_PEREF_IN_PROLOG:
225 errmsg = "PEReference in prolog\n";
226 break;
227 case XML_ERR_PEREF_IN_EPILOG:
228 errmsg = "PEReference in epilog\n";
229 break;
230 case XML_ERR_PEREF_NO_NAME:
231 errmsg = "PEReference: no name\n";
232 break;
233 case XML_ERR_PEREF_SEMICOL_MISSING:
234 errmsg = "PEReference: expecting ';'\n";
235 break;
236 case XML_ERR_ENTITY_LOOP:
237 errmsg = "Detected an entity reference loop\n";
238 break;
239 case XML_ERR_ENTITY_NOT_STARTED:
240 errmsg = "EntityValue: \" or ' expected\n";
241 break;
242 case XML_ERR_ENTITY_PE_INTERNAL:
243 errmsg = "PEReferences forbidden in internal subset\n";
244 break;
245 case XML_ERR_ENTITY_NOT_FINISHED:
246 errmsg = "EntityValue: \" or ' expected\n";
247 break;
248 case XML_ERR_ATTRIBUTE_NOT_STARTED:
249 errmsg = "AttValue: \" or ' expected\n";
250 break;
251 case XML_ERR_LT_IN_ATTRIBUTE:
252 errmsg = "Unescaped '<' not allowed in attributes values\n";
253 break;
254 case XML_ERR_LITERAL_NOT_STARTED:
255 errmsg = "SystemLiteral \" or ' expected\n";
256 break;
257 case XML_ERR_LITERAL_NOT_FINISHED:
258 errmsg = "Unfinished System or Public ID \" or ' expected\n";
259 break;
260 case XML_ERR_MISPLACED_CDATA_END:
261 errmsg = "Sequence ']]>' not allowed in content\n";
262 break;
263 case XML_ERR_URI_REQUIRED:
264 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
265 break;
266 case XML_ERR_PUBID_REQUIRED:
267 errmsg = "PUBLIC, the Public Identifier is missing\n";
268 break;
269 case XML_ERR_HYPHEN_IN_COMMENT:
270 errmsg = "Comment must not contain '--' (double-hyphen)\n";
271 break;
272 case XML_ERR_PI_NOT_STARTED:
273 errmsg = "xmlParsePI : no target name\n";
274 break;
275 case XML_ERR_RESERVED_XML_NAME:
276 errmsg = "Invalid PI name\n";
277 break;
278 case XML_ERR_NOTATION_NOT_STARTED:
279 errmsg = "NOTATION: Name expected here\n";
280 break;
281 case XML_ERR_NOTATION_NOT_FINISHED:
282 errmsg = "'>' required to close NOTATION declaration\n";
283 break;
284 case XML_ERR_VALUE_REQUIRED:
285 errmsg = "Entity value required\n";
286 break;
287 case XML_ERR_URI_FRAGMENT:
288 errmsg = "Fragment not allowed";
289 break;
290 case XML_ERR_ATTLIST_NOT_STARTED:
291 errmsg = "'(' required to start ATTLIST enumeration\n";
292 break;
293 case XML_ERR_NMTOKEN_REQUIRED:
294 errmsg = "NmToken expected in ATTLIST enumeration\n";
295 break;
296 case XML_ERR_ATTLIST_NOT_FINISHED:
297 errmsg = "')' required to finish ATTLIST enumeration\n";
298 break;
299 case XML_ERR_MIXED_NOT_STARTED:
300 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
301 break;
302 case XML_ERR_PCDATA_REQUIRED:
303 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
304 break;
305 case XML_ERR_ELEMCONTENT_NOT_STARTED:
306 errmsg = "ContentDecl : Name or '(' expected\n";
307 break;
308 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
309 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
310 break;
311 case XML_ERR_PEREF_IN_INT_SUBSET:
312 errmsg = "PEReference: forbidden within markup decl in internal subset\n";
313 break;
314 case XML_ERR_GT_REQUIRED:
315 errmsg = "expected '>'\n";
316 break;
317 case XML_ERR_CONDSEC_INVALID:
318 errmsg = "XML conditional section '[' expected\n";
319 break;
320 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
321 errmsg = "Content error in the external subset\n";
322 break;
323 case XML_ERR_CONDSEC_INVALID_KEYWORD:
324 errmsg = "conditional section INCLUDE or IGNORE keyword expected\n";
325 break;
326 case XML_ERR_CONDSEC_NOT_FINISHED:
327 errmsg = "XML conditional section not closed\n";
328 break;
329 case XML_ERR_XMLDECL_NOT_STARTED:
330 errmsg = "Text declaration '<?xml' required\n";
331 break;
332 case XML_ERR_XMLDECL_NOT_FINISHED:
333 errmsg = "parsing XML declaration: '?>' expected\n";
334 break;
335 case XML_ERR_EXT_ENTITY_STANDALONE:
336 errmsg = "external parsed entities cannot be standalone\n";
337 break;
338 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
339 errmsg = "EntityRef: expecting ';'\n";
340 break;
341 case XML_ERR_DOCTYPE_NOT_FINISHED:
342 errmsg = "DOCTYPE improperly terminated\n";
343 break;
344 case XML_ERR_LTSLASH_REQUIRED:
345 errmsg = "EndTag: '</' not found\n";
346 break;
347 case XML_ERR_EQUAL_REQUIRED:
348 errmsg = "expected '='\n";
349 break;
350 case XML_ERR_STRING_NOT_CLOSED:
351 errmsg = "String not closed expecting \" or '\n";
352 break;
353 case XML_ERR_STRING_NOT_STARTED:
354 errmsg = "String not started expecting ' or \"\n";
355 break;
356 case XML_ERR_ENCODING_NAME:
357 errmsg = "Invalid XML encoding name\n";
358 break;
359 case XML_ERR_STANDALONE_VALUE:
360 errmsg = "standalone accepts only 'yes' or 'no'\n";
361 break;
362 case XML_ERR_DOCUMENT_EMPTY:
363 errmsg = "Document is empty\n";
364 break;
365 case XML_ERR_DOCUMENT_END:
366 errmsg = "Extra content at the end of the document\n";
367 break;
368 case XML_ERR_NOT_WELL_BALANCED:
369 errmsg = "chunk is not well balanced\n";
370 break;
371 case XML_ERR_EXTRA_CONTENT:
372 errmsg = "extra content at the end of well balanced chunk\n";
373 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000374 case XML_ERR_VERSION_MISSING:
375 errmsg = "Malformed declaration expecting version\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377#if 0
378 case :
379 errmsg = "\n";
380 break;
381#endif
382 default:
383 errmsg = "Unregistered error message\n";
384 }
385 ctxt->errNo = error;
386 if (info == NULL) {
387 ctxt->sax->error(ctxt->userData, errmsg);
388 } else {
389 ctxt->sax->error(ctxt->userData, "%s: %s", errmsg, info);
390 }
391 ctxt->wellFormed = 0;
392 if (ctxt->recovery == 0)
393 ctxt->disableSAX = 1;
394}
395
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000396/**
397 * xmlFatalErrMsg:
398 * @ctxt: an XML parser context
399 * @error: the error number
400 * @msg: the error message
401 *
402 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
403 */
404static void
405xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg)
406{
407 if (ctxt == NULL) {
408 xmlGenericError(xmlGenericErrorContext,
409 "xmlFatalErr: no context !\n");
410 return;
411 }
412 ctxt->errNo = error;
413 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
414 return;
415 ctxt->sax->error(ctxt->userData, msg);
416 ctxt->wellFormed = 0;
417 if (ctxt->recovery == 0)
418 ctxt->disableSAX = 1;
419}
420
421/**
422 * xmlFatalErrMsgInt:
423 * @ctxt: an XML parser context
424 * @error: the error number
425 * @msg: the error message
426 * @val: an integer value
427 *
428 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
429 */
430static void
431xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
432 const char *msg, int val)
433{
434 if (ctxt == NULL) {
435 xmlGenericError(xmlGenericErrorContext,
436 "xmlFatalErr: no context !\n");
437 return;
438 }
439 ctxt->errNo = error;
440 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
441 return;
442 ctxt->sax->error(ctxt->userData, msg, val);
443 ctxt->wellFormed = 0;
444 if (ctxt->recovery == 0)
445 ctxt->disableSAX = 1;
446}
447
448/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000449 * xmlFatalErrMsgStr:
450 * @ctxt: an XML parser context
451 * @error: the error number
452 * @msg: the error message
453 * @val: a string value
454 *
455 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
456 */
457static void
458xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
459 const char *msg, const xmlChar *val)
460{
461 if (ctxt == NULL) {
462 xmlGenericError(xmlGenericErrorContext,
463 "xmlFatalErr: no context !\n");
464 return;
465 }
466 ctxt->errNo = error;
467 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
468 return;
469 ctxt->sax->error(ctxt->userData, msg, val);
470 ctxt->wellFormed = 0;
471 if (ctxt->recovery == 0)
472 ctxt->disableSAX = 1;
473}
474
475/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 * xmlNsErr:
477 * @ctxt: an XML parser context
478 * @error: the error number
479 * @msg: the message
480 * @info1: extra information string
481 * @info2: extra information string
482 *
483 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
484 */
485static void
486xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
487 const char *msg,
488 const xmlChar *info1, const xmlChar *info2, const xmlChar *info3)
489{
490 if (ctxt == NULL)
491 return;
492 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
493 return;
494
495 ctxt->errNo = error;
496 if (info1 == NULL) {
497 ctxt->sax->error(ctxt->userData, msg);
498 } else if (info2 == NULL) {
499 ctxt->sax->error(ctxt->userData, msg, info1);
500 } else if (info3 == NULL) {
501 ctxt->sax->error(ctxt->userData, msg, info1, info2);
502 } else {
503 ctxt->sax->error(ctxt->userData, msg, info1, info2, info3);
504 }
505 ctxt->nsWellFormed = 0;
506}
507
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508/************************************************************************
509 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000510 * SAX2 defaulted attributes handling *
511 * *
512 ************************************************************************/
513
514/**
515 * xmlDetectSAX2:
516 * @ctxt: an XML parser context
517 *
518 * Do the SAX2 detection and specific intialization
519 */
520static void
521xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
522 if (ctxt == NULL) return;
523 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
524 ((ctxt->sax->startElementNs != NULL) ||
525 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
526
527 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
528 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
529 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
530}
531
532#ifdef SAX2
533typedef struct _xmlDefAttrs xmlDefAttrs;
534typedef xmlDefAttrs *xmlDefAttrsPtr;
535struct _xmlDefAttrs {
536 int nbAttrs; /* number of defaulted attributes on that element */
537 int maxAttrs; /* the size of the array */
538 const xmlChar *values[4]; /* array of localname/prefix/values */
539};
540#endif
541
542/**
543 * xmlAddDefAttrs:
544 * @ctxt: an XML parser context
545 * @fullname: the element fullname
546 * @fullattr: the attribute fullname
547 * @value: the attribute value
548 *
549 * Add a defaulted attribute for an element
550 */
551static void
552xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
553 const xmlChar *fullname,
554 const xmlChar *fullattr,
555 const xmlChar *value) {
556 xmlDefAttrsPtr defaults;
557 int len;
558 const xmlChar *name;
559 const xmlChar *prefix;
560
561 if (ctxt->attsDefault == NULL) {
562 ctxt->attsDefault = xmlHashCreate(10);
563 if (ctxt->attsDefault == NULL)
564 goto mem_error;
565 }
566
567 /*
568 * plit the element name into prefix:localname , the string found
569 * are within the DTD and hen not associated to namespace names.
570 */
571 name = xmlSplitQName3(fullname, &len);
572 if (name == NULL) {
573 name = xmlDictLookup(ctxt->dict, fullname, -1);
574 prefix = NULL;
575 } else {
576 name = xmlDictLookup(ctxt->dict, name, -1);
577 prefix = xmlDictLookup(ctxt->dict, fullname, len);
578 }
579
580 /*
581 * make sure there is some storage
582 */
583 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
584 if (defaults == NULL) {
585 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
586 12 * sizeof(const xmlChar *));
587 if (defaults == NULL)
588 goto mem_error;
589 defaults->maxAttrs = 4;
590 defaults->nbAttrs = 0;
591 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
592 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
593 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
594 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
595 if (defaults == NULL)
596 goto mem_error;
597 defaults->maxAttrs *= 2;
598 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
599 }
600
601 /*
602 * plit the element name into prefix:localname , the string found
603 * are within the DTD and hen not associated to namespace names.
604 */
605 name = xmlSplitQName3(fullattr, &len);
606 if (name == NULL) {
607 name = xmlDictLookup(ctxt->dict, fullattr, -1);
608 prefix = NULL;
609 } else {
610 name = xmlDictLookup(ctxt->dict, name, -1);
611 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
612 }
613
614 defaults->values[4 * defaults->nbAttrs] = name;
615 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
616 /* intern the string and precompute the end */
617 len = xmlStrlen(value);
618 value = xmlDictLookup(ctxt->dict, value, len);
619 defaults->values[4 * defaults->nbAttrs + 2] = value;
620 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
621 defaults->nbAttrs++;
622
623 return;
624
625mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000626 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000627 return;
628}
629
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000630/**
631 * xmlAddSpecialAttr:
632 * @ctxt: an XML parser context
633 * @fullname: the element fullname
634 * @fullattr: the attribute fullname
635 * @type: the attribute type
636 *
637 * Register that this attribute is not CDATA
638 */
639static void
640xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
641 const xmlChar *fullname,
642 const xmlChar *fullattr,
643 int type)
644{
645 if (ctxt->attsSpecial == NULL) {
646 ctxt->attsSpecial = xmlHashCreate(10);
647 if (ctxt->attsSpecial == NULL)
648 goto mem_error;
649 }
650
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000651 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
652 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000653 return;
654
655mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000656 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000657 return;
658}
659
Daniel Veillard4432df22003-09-28 18:58:27 +0000660/**
661 * xmlCheckLanguageID:
662 * @lang: pointer to the string value
663 *
664 * Checks that the value conforms to the LanguageID production:
665 *
666 * NOTE: this is somewhat deprecated, those productions were removed from
667 * the XML Second edition.
668 *
669 * [33] LanguageID ::= Langcode ('-' Subcode)*
670 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
671 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
672 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
673 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
674 * [38] Subcode ::= ([a-z] | [A-Z])+
675 *
676 * Returns 1 if correct 0 otherwise
677 **/
678int
679xmlCheckLanguageID(const xmlChar * lang)
680{
681 const xmlChar *cur = lang;
682
683 if (cur == NULL)
684 return (0);
685 if (((cur[0] == 'i') && (cur[1] == '-')) ||
686 ((cur[0] == 'I') && (cur[1] == '-'))) {
687 /*
688 * IANA code
689 */
690 cur += 2;
691 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
692 ((cur[0] >= 'a') && (cur[0] <= 'z')))
693 cur++;
694 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
695 ((cur[0] == 'X') && (cur[1] == '-'))) {
696 /*
697 * User code
698 */
699 cur += 2;
700 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
701 ((cur[0] >= 'a') && (cur[0] <= 'z')))
702 cur++;
703 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
704 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
705 /*
706 * ISO639
707 */
708 cur++;
709 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
710 ((cur[0] >= 'a') && (cur[0] <= 'z')))
711 cur++;
712 else
713 return (0);
714 } else
715 return (0);
716 while (cur[0] != 0) { /* non input consuming */
717 if (cur[0] != '-')
718 return (0);
719 cur++;
720 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
721 ((cur[0] >= 'a') && (cur[0] <= 'z')))
722 cur++;
723 else
724 return (0);
725 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
726 ((cur[0] >= 'a') && (cur[0] <= 'z')))
727 cur++;
728 }
729 return (1);
730}
731
Owen Taylor3473f882001-02-23 17:55:21 +0000732/************************************************************************
733 * *
734 * Parser stacks related functions and macros *
735 * *
736 ************************************************************************/
737
738xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
739 const xmlChar ** str);
740
Daniel Veillard0fb18932003-09-07 09:14:37 +0000741#ifdef SAX2
742/**
743 * nsPush:
744 * @ctxt: an XML parser context
745 * @prefix: the namespace prefix or NULL
746 * @URL: the namespace name
747 *
748 * Pushes a new parser namespace on top of the ns stack
749 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000750 * Returns -1 in case of error, -2 if the namespace should be discarded
751 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000752 */
753static int
754nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
755{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000756 if (ctxt->options & XML_PARSE_NSCLEAN) {
757 int i;
758 for (i = 0;i < ctxt->nsNr;i += 2) {
759 if (ctxt->nsTab[i] == prefix) {
760 /* in scope */
761 if (ctxt->nsTab[i + 1] == URL)
762 return(-2);
763 /* out of scope keep it */
764 break;
765 }
766 }
767 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000768 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
769 ctxt->nsMax = 10;
770 ctxt->nsNr = 0;
771 ctxt->nsTab = (const xmlChar **)
772 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
773 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000774 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000775 ctxt->nsMax = 0;
776 return (-1);
777 }
778 } else if (ctxt->nsNr >= ctxt->nsMax) {
779 ctxt->nsMax *= 2;
780 ctxt->nsTab = (const xmlChar **)
781 xmlRealloc(ctxt->nsTab,
782 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
783 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000784 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000785 ctxt->nsMax /= 2;
786 return (-1);
787 }
788 }
789 ctxt->nsTab[ctxt->nsNr++] = prefix;
790 ctxt->nsTab[ctxt->nsNr++] = URL;
791 return (ctxt->nsNr);
792}
793/**
794 * nsPop:
795 * @ctxt: an XML parser context
796 * @nr: the number to pop
797 *
798 * Pops the top @nr parser prefix/namespace from the ns stack
799 *
800 * Returns the number of namespaces removed
801 */
802static int
803nsPop(xmlParserCtxtPtr ctxt, int nr)
804{
805 int i;
806
807 if (ctxt->nsTab == NULL) return(0);
808 if (ctxt->nsNr < nr) {
809 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
810 nr = ctxt->nsNr;
811 }
812 if (ctxt->nsNr <= 0)
813 return (0);
814
815 for (i = 0;i < nr;i++) {
816 ctxt->nsNr--;
817 ctxt->nsTab[ctxt->nsNr] = NULL;
818 }
819 return(nr);
820}
821#endif
822
823static int
824xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
825 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000826 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000827 int maxatts;
828
829 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000830 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000831 atts = (const xmlChar **)
832 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000833 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000834 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000835 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
836 if (attallocs == NULL) goto mem_error;
837 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000838 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000839 } else if (nr + 5 > ctxt->maxatts) {
840 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000841 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
842 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000843 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000845 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
846 (maxatts / 5) * sizeof(int));
847 if (attallocs == NULL) goto mem_error;
848 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000849 ctxt->maxatts = maxatts;
850 }
851 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000853 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000855}
856
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000857/**
858 * inputPush:
859 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000860 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000861 *
862 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000863 *
864 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000865 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000866extern int
867inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
868{
869 if (ctxt->inputNr >= ctxt->inputMax) {
870 ctxt->inputMax *= 2;
871 ctxt->inputTab =
872 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
873 ctxt->inputMax *
874 sizeof(ctxt->inputTab[0]));
875 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000876 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000877 return (0);
878 }
879 }
880 ctxt->inputTab[ctxt->inputNr] = value;
881 ctxt->input = value;
882 return (ctxt->inputNr++);
883}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000884/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000885 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000886 * @ctxt: an XML parser context
887 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000888 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000889 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000890 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000891 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000892extern xmlParserInputPtr
893inputPop(xmlParserCtxtPtr ctxt)
894{
895 xmlParserInputPtr ret;
896
897 if (ctxt->inputNr <= 0)
898 return (0);
899 ctxt->inputNr--;
900 if (ctxt->inputNr > 0)
901 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
902 else
903 ctxt->input = NULL;
904 ret = ctxt->inputTab[ctxt->inputNr];
905 ctxt->inputTab[ctxt->inputNr] = 0;
906 return (ret);
907}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000908/**
909 * nodePush:
910 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000911 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000912 *
913 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000914 *
915 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000916 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000917extern int
918nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
919{
920 if (ctxt->nodeNr >= ctxt->nodeMax) {
921 ctxt->nodeMax *= 2;
922 ctxt->nodeTab =
923 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
924 ctxt->nodeMax *
925 sizeof(ctxt->nodeTab[0]));
926 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000927 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000928 return (0);
929 }
930 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000931#ifdef MAX_DEPTH
932 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000933 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000934 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000935 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000936 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000937 return(0);
938 }
939#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000940 ctxt->nodeTab[ctxt->nodeNr] = value;
941 ctxt->node = value;
942 return (ctxt->nodeNr++);
943}
944/**
945 * nodePop:
946 * @ctxt: an XML parser context
947 *
948 * Pops the top element node from the node stack
949 *
950 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000951 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000952extern xmlNodePtr
953nodePop(xmlParserCtxtPtr ctxt)
954{
955 xmlNodePtr ret;
956
957 if (ctxt->nodeNr <= 0)
958 return (0);
959 ctxt->nodeNr--;
960 if (ctxt->nodeNr > 0)
961 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
962 else
963 ctxt->node = NULL;
964 ret = ctxt->nodeTab[ctxt->nodeNr];
965 ctxt->nodeTab[ctxt->nodeNr] = 0;
966 return (ret);
967}
968/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000969 * nameNsPush:
970 * @ctxt: an XML parser context
971 * @value: the element name
972 * @prefix: the element prefix
973 * @URI: the element namespace name
974 *
975 * Pushes a new element name/prefix/URL on top of the name stack
976 *
977 * Returns -1 in case of error, the index in the stack otherwise
978 */
979static int
980nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
981 const xmlChar *prefix, const xmlChar *URI, int nsNr)
982{
983 if (ctxt->nameNr >= ctxt->nameMax) {
984 const xmlChar * *tmp;
985 void **tmp2;
986 ctxt->nameMax *= 2;
987 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
988 ctxt->nameMax *
989 sizeof(ctxt->nameTab[0]));
990 if (tmp == NULL) {
991 ctxt->nameMax /= 2;
992 goto mem_error;
993 }
994 ctxt->nameTab = tmp;
995 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
996 ctxt->nameMax * 3 *
997 sizeof(ctxt->pushTab[0]));
998 if (tmp2 == NULL) {
999 ctxt->nameMax /= 2;
1000 goto mem_error;
1001 }
1002 ctxt->pushTab = tmp2;
1003 }
1004 ctxt->nameTab[ctxt->nameNr] = value;
1005 ctxt->name = value;
1006 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1007 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001008 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001009 return (ctxt->nameNr++);
1010mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001011 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001012 return (-1);
1013}
1014/**
1015 * nameNsPop:
1016 * @ctxt: an XML parser context
1017 *
1018 * Pops the top element/prefix/URI name from the name stack
1019 *
1020 * Returns the name just removed
1021 */
1022static const xmlChar *
1023nameNsPop(xmlParserCtxtPtr ctxt)
1024{
1025 const xmlChar *ret;
1026
1027 if (ctxt->nameNr <= 0)
1028 return (0);
1029 ctxt->nameNr--;
1030 if (ctxt->nameNr > 0)
1031 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1032 else
1033 ctxt->name = NULL;
1034 ret = ctxt->nameTab[ctxt->nameNr];
1035 ctxt->nameTab[ctxt->nameNr] = NULL;
1036 return (ret);
1037}
1038
1039/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001040 * namePush:
1041 * @ctxt: an XML parser context
1042 * @value: the element name
1043 *
1044 * Pushes a new element name on top of the name stack
1045 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001046 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001047 */
1048extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001049namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050{
1051 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001052 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001053 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001054 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001055 ctxt->nameMax *
1056 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057 if (tmp == NULL) {
1058 ctxt->nameMax /= 2;
1059 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001060 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001061 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001062 }
1063 ctxt->nameTab[ctxt->nameNr] = value;
1064 ctxt->name = value;
1065 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001066mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001067 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001068 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001069}
1070/**
1071 * namePop:
1072 * @ctxt: an XML parser context
1073 *
1074 * Pops the top element name from the name stack
1075 *
1076 * Returns the name just removed
1077 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001078extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001079namePop(xmlParserCtxtPtr ctxt)
1080{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001081 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001082
1083 if (ctxt->nameNr <= 0)
1084 return (0);
1085 ctxt->nameNr--;
1086 if (ctxt->nameNr > 0)
1087 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1088 else
1089 ctxt->name = NULL;
1090 ret = ctxt->nameTab[ctxt->nameNr];
1091 ctxt->nameTab[ctxt->nameNr] = 0;
1092 return (ret);
1093}
Owen Taylor3473f882001-02-23 17:55:21 +00001094
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001095static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001096 if (ctxt->spaceNr >= ctxt->spaceMax) {
1097 ctxt->spaceMax *= 2;
1098 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1099 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1100 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001101 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001102 return(0);
1103 }
1104 }
1105 ctxt->spaceTab[ctxt->spaceNr] = val;
1106 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1107 return(ctxt->spaceNr++);
1108}
1109
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001110static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001111 int ret;
1112 if (ctxt->spaceNr <= 0) return(0);
1113 ctxt->spaceNr--;
1114 if (ctxt->spaceNr > 0)
1115 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1116 else
1117 ctxt->space = NULL;
1118 ret = ctxt->spaceTab[ctxt->spaceNr];
1119 ctxt->spaceTab[ctxt->spaceNr] = -1;
1120 return(ret);
1121}
1122
1123/*
1124 * Macros for accessing the content. Those should be used only by the parser,
1125 * and not exported.
1126 *
1127 * Dirty macros, i.e. one often need to make assumption on the context to
1128 * use them
1129 *
1130 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1131 * To be used with extreme caution since operations consuming
1132 * characters may move the input buffer to a different location !
1133 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1134 * This should be used internally by the parser
1135 * only to compare to ASCII values otherwise it would break when
1136 * running with UTF-8 encoding.
1137 * RAW same as CUR but in the input buffer, bypass any token
1138 * extraction that may have been done
1139 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1140 * to compare on ASCII based substring.
1141 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001142 * strings without newlines within the parser.
1143 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1144 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001145 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1146 *
1147 * NEXT Skip to the next character, this does the proper decoding
1148 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001149 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001150 * CUR_CHAR(l) returns the current unicode character (int), set l
1151 * to the number of xmlChars used for the encoding [0-5].
1152 * CUR_SCHAR same but operate on a string instead of the context
1153 * COPY_BUF copy the current unicode char to the target buffer, increment
1154 * the index
1155 * GROW, SHRINK handling of input buffers
1156 */
1157
Daniel Veillardfdc91562002-07-01 21:52:03 +00001158#define RAW (*ctxt->input->cur)
1159#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001160#define NXT(val) ctxt->input->cur[(val)]
1161#define CUR_PTR ctxt->input->cur
1162
1163#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001164 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001165 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001166 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001167 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1168 xmlPopInput(ctxt); \
1169 } while (0)
1170
Daniel Veillarda880b122003-04-21 21:36:41 +00001171#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001172 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1173 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001174 xmlSHRINK (ctxt);
1175
1176static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1177 xmlParserInputShrink(ctxt->input);
1178 if ((*ctxt->input->cur == 0) &&
1179 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1180 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001181 }
Owen Taylor3473f882001-02-23 17:55:21 +00001182
Daniel Veillarda880b122003-04-21 21:36:41 +00001183#define GROW if ((ctxt->progressive == 0) && \
1184 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001185 xmlGROW (ctxt);
1186
1187static void xmlGROW (xmlParserCtxtPtr ctxt) {
1188 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1189 if ((*ctxt->input->cur == 0) &&
1190 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1191 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001192}
Owen Taylor3473f882001-02-23 17:55:21 +00001193
1194#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1195
1196#define NEXT xmlNextChar(ctxt)
1197
Daniel Veillard21a0f912001-02-25 19:54:14 +00001198#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001199 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001200 ctxt->input->cur++; \
1201 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001202 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001203 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1204 }
1205
Owen Taylor3473f882001-02-23 17:55:21 +00001206#define NEXTL(l) do { \
1207 if (*(ctxt->input->cur) == '\n') { \
1208 ctxt->input->line++; ctxt->input->col = 1; \
1209 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001210 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001211 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001212 } while (0)
1213
1214#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1215#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1216
1217#define COPY_BUF(l,b,i,v) \
1218 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001219 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001220
1221/**
1222 * xmlSkipBlankChars:
1223 * @ctxt: the XML parser context
1224 *
1225 * skip all blanks character found at that point in the input streams.
1226 * It pops up finished entities in the process if allowable at that point.
1227 *
1228 * Returns the number of space chars skipped
1229 */
1230
1231int
1232xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001233 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001234
1235 /*
1236 * It's Okay to use CUR/NEXT here since all the blanks are on
1237 * the ASCII range.
1238 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001239 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1240 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001241 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001242 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001243 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001244 cur = ctxt->input->cur;
1245 while (IS_BLANK(*cur)) {
1246 if (*cur == '\n') {
1247 ctxt->input->line++; ctxt->input->col = 1;
1248 }
1249 cur++;
1250 res++;
1251 if (*cur == 0) {
1252 ctxt->input->cur = cur;
1253 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1254 cur = ctxt->input->cur;
1255 }
1256 }
1257 ctxt->input->cur = cur;
1258 } else {
1259 int cur;
1260 do {
1261 cur = CUR;
1262 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1263 NEXT;
1264 cur = CUR;
1265 res++;
1266 }
1267 while ((cur == 0) && (ctxt->inputNr > 1) &&
1268 (ctxt->instate != XML_PARSER_COMMENT)) {
1269 xmlPopInput(ctxt);
1270 cur = CUR;
1271 }
1272 /*
1273 * Need to handle support of entities branching here
1274 */
1275 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1276 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1277 }
Owen Taylor3473f882001-02-23 17:55:21 +00001278 return(res);
1279}
1280
1281/************************************************************************
1282 * *
1283 * Commodity functions to handle entities *
1284 * *
1285 ************************************************************************/
1286
1287/**
1288 * xmlPopInput:
1289 * @ctxt: an XML parser context
1290 *
1291 * xmlPopInput: the current input pointed by ctxt->input came to an end
1292 * pop it and return the next char.
1293 *
1294 * Returns the current xmlChar in the parser context
1295 */
1296xmlChar
1297xmlPopInput(xmlParserCtxtPtr ctxt) {
1298 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1299 if (xmlParserDebugEntities)
1300 xmlGenericError(xmlGenericErrorContext,
1301 "Popping input %d\n", ctxt->inputNr);
1302 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001303 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001304 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1305 return(xmlPopInput(ctxt));
1306 return(CUR);
1307}
1308
1309/**
1310 * xmlPushInput:
1311 * @ctxt: an XML parser context
1312 * @input: an XML parser input fragment (entity, XML fragment ...).
1313 *
1314 * xmlPushInput: switch to a new input stream which is stacked on top
1315 * of the previous one(s).
1316 */
1317void
1318xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1319 if (input == NULL) return;
1320
1321 if (xmlParserDebugEntities) {
1322 if ((ctxt->input != NULL) && (ctxt->input->filename))
1323 xmlGenericError(xmlGenericErrorContext,
1324 "%s(%d): ", ctxt->input->filename,
1325 ctxt->input->line);
1326 xmlGenericError(xmlGenericErrorContext,
1327 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1328 }
1329 inputPush(ctxt, input);
1330 GROW;
1331}
1332
1333/**
1334 * xmlParseCharRef:
1335 * @ctxt: an XML parser context
1336 *
1337 * parse Reference declarations
1338 *
1339 * [66] CharRef ::= '&#' [0-9]+ ';' |
1340 * '&#x' [0-9a-fA-F]+ ';'
1341 *
1342 * [ WFC: Legal Character ]
1343 * Characters referred to using character references must match the
1344 * production for Char.
1345 *
1346 * Returns the value parsed (as an int), 0 in case of error
1347 */
1348int
1349xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001350 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001351 int count = 0;
1352
Owen Taylor3473f882001-02-23 17:55:21 +00001353 /*
1354 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1355 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001356 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001357 (NXT(2) == 'x')) {
1358 SKIP(3);
1359 GROW;
1360 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001361 if (count++ > 20) {
1362 count = 0;
1363 GROW;
1364 }
1365 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001366 val = val * 16 + (CUR - '0');
1367 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1368 val = val * 16 + (CUR - 'a') + 10;
1369 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1370 val = val * 16 + (CUR - 'A') + 10;
1371 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001372 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001373 val = 0;
1374 break;
1375 }
1376 NEXT;
1377 count++;
1378 }
1379 if (RAW == ';') {
1380 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001381 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001382 ctxt->nbChars ++;
1383 ctxt->input->cur++;
1384 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001385 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001386 SKIP(2);
1387 GROW;
1388 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001389 if (count++ > 20) {
1390 count = 0;
1391 GROW;
1392 }
1393 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001394 val = val * 10 + (CUR - '0');
1395 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001396 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001397 val = 0;
1398 break;
1399 }
1400 NEXT;
1401 count++;
1402 }
1403 if (RAW == ';') {
1404 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001405 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001406 ctxt->nbChars ++;
1407 ctxt->input->cur++;
1408 }
1409 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001410 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001411 }
1412
1413 /*
1414 * [ WFC: Legal Character ]
1415 * Characters referred to using character references must match the
1416 * production for Char.
1417 */
1418 if (IS_CHAR(val)) {
1419 return(val);
1420 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001421 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1422 "xmlParseCharRef: invalid xmlChar value %d\n",
1423 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001424 }
1425 return(0);
1426}
1427
1428/**
1429 * xmlParseStringCharRef:
1430 * @ctxt: an XML parser context
1431 * @str: a pointer to an index in the string
1432 *
1433 * parse Reference declarations, variant parsing from a string rather
1434 * than an an input flow.
1435 *
1436 * [66] CharRef ::= '&#' [0-9]+ ';' |
1437 * '&#x' [0-9a-fA-F]+ ';'
1438 *
1439 * [ WFC: Legal Character ]
1440 * Characters referred to using character references must match the
1441 * production for Char.
1442 *
1443 * Returns the value parsed (as an int), 0 in case of error, str will be
1444 * updated to the current value of the index
1445 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001446static int
Owen Taylor3473f882001-02-23 17:55:21 +00001447xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1448 const xmlChar *ptr;
1449 xmlChar cur;
1450 int val = 0;
1451
1452 if ((str == NULL) || (*str == NULL)) return(0);
1453 ptr = *str;
1454 cur = *ptr;
1455 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1456 ptr += 3;
1457 cur = *ptr;
1458 while (cur != ';') { /* Non input consuming loop */
1459 if ((cur >= '0') && (cur <= '9'))
1460 val = val * 16 + (cur - '0');
1461 else if ((cur >= 'a') && (cur <= 'f'))
1462 val = val * 16 + (cur - 'a') + 10;
1463 else if ((cur >= 'A') && (cur <= 'F'))
1464 val = val * 16 + (cur - 'A') + 10;
1465 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001466 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001467 val = 0;
1468 break;
1469 }
1470 ptr++;
1471 cur = *ptr;
1472 }
1473 if (cur == ';')
1474 ptr++;
1475 } else if ((cur == '&') && (ptr[1] == '#')){
1476 ptr += 2;
1477 cur = *ptr;
1478 while (cur != ';') { /* Non input consuming loops */
1479 if ((cur >= '0') && (cur <= '9'))
1480 val = val * 10 + (cur - '0');
1481 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001482 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001483 val = 0;
1484 break;
1485 }
1486 ptr++;
1487 cur = *ptr;
1488 }
1489 if (cur == ';')
1490 ptr++;
1491 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001492 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001493 return(0);
1494 }
1495 *str = ptr;
1496
1497 /*
1498 * [ WFC: Legal Character ]
1499 * Characters referred to using character references must match the
1500 * production for Char.
1501 */
1502 if (IS_CHAR(val)) {
1503 return(val);
1504 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001505 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1506 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1507 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 }
1509 return(0);
1510}
1511
1512/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001513 * xmlNewBlanksWrapperInputStream:
1514 * @ctxt: an XML parser context
1515 * @entity: an Entity pointer
1516 *
1517 * Create a new input stream for wrapping
1518 * blanks around a PEReference
1519 *
1520 * Returns the new input stream or NULL
1521 */
1522
1523static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1524
Daniel Veillardf4862f02002-09-10 11:13:43 +00001525static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001526xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1527 xmlParserInputPtr input;
1528 xmlChar *buffer;
1529 size_t length;
1530 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001531 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1532 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001533 return(NULL);
1534 }
1535 if (xmlParserDebugEntities)
1536 xmlGenericError(xmlGenericErrorContext,
1537 "new blanks wrapper for entity: %s\n", entity->name);
1538 input = xmlNewInputStream(ctxt);
1539 if (input == NULL) {
1540 return(NULL);
1541 }
1542 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001543 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001544 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001545 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001546 return(NULL);
1547 }
1548 buffer [0] = ' ';
1549 buffer [1] = '%';
1550 buffer [length-3] = ';';
1551 buffer [length-2] = ' ';
1552 buffer [length-1] = 0;
1553 memcpy(buffer + 2, entity->name, length - 5);
1554 input->free = deallocblankswrapper;
1555 input->base = buffer;
1556 input->cur = buffer;
1557 input->length = length;
1558 input->end = &buffer[length];
1559 return(input);
1560}
1561
1562/**
Owen Taylor3473f882001-02-23 17:55:21 +00001563 * xmlParserHandlePEReference:
1564 * @ctxt: the parser context
1565 *
1566 * [69] PEReference ::= '%' Name ';'
1567 *
1568 * [ WFC: No Recursion ]
1569 * A parsed entity must not contain a recursive
1570 * reference to itself, either directly or indirectly.
1571 *
1572 * [ WFC: Entity Declared ]
1573 * In a document without any DTD, a document with only an internal DTD
1574 * subset which contains no parameter entity references, or a document
1575 * with "standalone='yes'", ... ... The declaration of a parameter
1576 * entity must precede any reference to it...
1577 *
1578 * [ VC: Entity Declared ]
1579 * In a document with an external subset or external parameter entities
1580 * with "standalone='no'", ... ... The declaration of a parameter entity
1581 * must precede any reference to it...
1582 *
1583 * [ WFC: In DTD ]
1584 * Parameter-entity references may only appear in the DTD.
1585 * NOTE: misleading but this is handled.
1586 *
1587 * A PEReference may have been detected in the current input stream
1588 * the handling is done accordingly to
1589 * http://www.w3.org/TR/REC-xml#entproc
1590 * i.e.
1591 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001592 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001593 */
1594void
1595xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001596 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001597 xmlEntityPtr entity = NULL;
1598 xmlParserInputPtr input;
1599
Owen Taylor3473f882001-02-23 17:55:21 +00001600 if (RAW != '%') return;
1601 switch(ctxt->instate) {
1602 case XML_PARSER_CDATA_SECTION:
1603 return;
1604 case XML_PARSER_COMMENT:
1605 return;
1606 case XML_PARSER_START_TAG:
1607 return;
1608 case XML_PARSER_END_TAG:
1609 return;
1610 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001611 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001612 return;
1613 case XML_PARSER_PROLOG:
1614 case XML_PARSER_START:
1615 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001617 return;
1618 case XML_PARSER_ENTITY_DECL:
1619 case XML_PARSER_CONTENT:
1620 case XML_PARSER_ATTRIBUTE_VALUE:
1621 case XML_PARSER_PI:
1622 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001623 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001624 /* we just ignore it there */
1625 return;
1626 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001627 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001628 return;
1629 case XML_PARSER_ENTITY_VALUE:
1630 /*
1631 * NOTE: in the case of entity values, we don't do the
1632 * substitution here since we need the literal
1633 * entity value to be able to save the internal
1634 * subset of the document.
1635 * This will be handled by xmlStringDecodeEntities
1636 */
1637 return;
1638 case XML_PARSER_DTD:
1639 /*
1640 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1641 * In the internal DTD subset, parameter-entity references
1642 * can occur only where markup declarations can occur, not
1643 * within markup declarations.
1644 * In that case this is handled in xmlParseMarkupDecl
1645 */
1646 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1647 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001648 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1649 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001650 break;
1651 case XML_PARSER_IGNORE:
1652 return;
1653 }
1654
1655 NEXT;
1656 name = xmlParseName(ctxt);
1657 if (xmlParserDebugEntities)
1658 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001659 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001660 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001661 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001662 } else {
1663 if (RAW == ';') {
1664 NEXT;
1665 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1666 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1667 if (entity == NULL) {
1668
1669 /*
1670 * [ WFC: Entity Declared ]
1671 * In a document without any DTD, a document with only an
1672 * internal DTD subset which contains no parameter entity
1673 * references, or a document with "standalone='yes'", ...
1674 * ... The declaration of a parameter entity must precede
1675 * any reference to it...
1676 */
1677 if ((ctxt->standalone == 1) ||
1678 ((ctxt->hasExternalSubset == 0) &&
1679 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001680 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001681 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001682 } else {
1683 /*
1684 * [ VC: Entity Declared ]
1685 * In a document with an external subset or external
1686 * parameter entities with "standalone='no'", ...
1687 * ... The declaration of a parameter entity must precede
1688 * any reference to it...
1689 */
1690 if ((!ctxt->disableSAX) &&
1691 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1692 ctxt->vctxt.error(ctxt->vctxt.userData,
1693 "PEReference: %%%s; not found\n", name);
1694 } else if ((!ctxt->disableSAX) &&
1695 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1696 ctxt->sax->warning(ctxt->userData,
1697 "PEReference: %%%s; not found\n", name);
1698 ctxt->valid = 0;
1699 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001700 } else if (ctxt->input->free != deallocblankswrapper) {
1701 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1702 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001703 } else {
1704 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1705 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001706 xmlChar start[4];
1707 xmlCharEncoding enc;
1708
Owen Taylor3473f882001-02-23 17:55:21 +00001709 /*
1710 * handle the extra spaces added before and after
1711 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001712 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001713 */
1714 input = xmlNewEntityInputStream(ctxt, entity);
1715 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001716
1717 /*
1718 * Get the 4 first bytes and decode the charset
1719 * if enc != XML_CHAR_ENCODING_NONE
1720 * plug some encoding conversion routines.
1721 */
1722 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001723 if (entity->length >= 4) {
1724 start[0] = RAW;
1725 start[1] = NXT(1);
1726 start[2] = NXT(2);
1727 start[3] = NXT(3);
1728 enc = xmlDetectCharEncoding(start, 4);
1729 if (enc != XML_CHAR_ENCODING_NONE) {
1730 xmlSwitchEncoding(ctxt, enc);
1731 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001732 }
1733
Owen Taylor3473f882001-02-23 17:55:21 +00001734 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1735 (RAW == '<') && (NXT(1) == '?') &&
1736 (NXT(2) == 'x') && (NXT(3) == 'm') &&
1737 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1738 xmlParseTextDecl(ctxt);
1739 }
Owen Taylor3473f882001-02-23 17:55:21 +00001740 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001741 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1742 "PEReference: %s is not a parameter entity\n",
1743 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001744 }
1745 }
1746 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001747 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001748 }
Owen Taylor3473f882001-02-23 17:55:21 +00001749 }
1750}
1751
1752/*
1753 * Macro used to grow the current buffer.
1754 */
1755#define growBuffer(buffer) { \
1756 buffer##_size *= 2; \
1757 buffer = (xmlChar *) \
1758 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001759 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001760}
1761
1762/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001763 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001764 * @ctxt: the parser context
1765 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001766 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001767 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1768 * @end: an end marker xmlChar, 0 if none
1769 * @end2: an end marker xmlChar, 0 if none
1770 * @end3: an end marker xmlChar, 0 if none
1771 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001772 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001773 *
1774 * [67] Reference ::= EntityRef | CharRef
1775 *
1776 * [69] PEReference ::= '%' Name ';'
1777 *
1778 * Returns A newly allocated string with the substitution done. The caller
1779 * must deallocate it !
1780 */
1781xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001782xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1783 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001784 xmlChar *buffer = NULL;
1785 int buffer_size = 0;
1786
1787 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001788 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001789 xmlEntityPtr ent;
1790 int c,l;
1791 int nbchars = 0;
1792
Daniel Veillarde57ec792003-09-10 10:50:59 +00001793 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001794 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001795 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001796
1797 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001798 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001799 return(NULL);
1800 }
1801
1802 /*
1803 * allocate a translation buffer.
1804 */
1805 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001806 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001807 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001808
1809 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001810 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001811 * we are operating on already parsed values.
1812 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001813 if (str < last)
1814 c = CUR_SCHAR(str, l);
1815 else
1816 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001817 while ((c != 0) && (c != end) && /* non input consuming loop */
1818 (c != end2) && (c != end3)) {
1819
1820 if (c == 0) break;
1821 if ((c == '&') && (str[1] == '#')) {
1822 int val = xmlParseStringCharRef(ctxt, &str);
1823 if (val != 0) {
1824 COPY_BUF(0,buffer,nbchars,val);
1825 }
1826 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1827 if (xmlParserDebugEntities)
1828 xmlGenericError(xmlGenericErrorContext,
1829 "String decoding Entity Reference: %.30s\n",
1830 str);
1831 ent = xmlParseStringEntityRef(ctxt, &str);
1832 if ((ent != NULL) &&
1833 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1834 if (ent->content != NULL) {
1835 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1836 } else {
1837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1838 ctxt->sax->error(ctxt->userData,
1839 "internal error entity has no content\n");
1840 }
1841 } else if ((ent != NULL) && (ent->content != NULL)) {
1842 xmlChar *rep;
1843
1844 ctxt->depth++;
1845 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1846 0, 0, 0);
1847 ctxt->depth--;
1848 if (rep != NULL) {
1849 current = rep;
1850 while (*current != 0) { /* non input consuming loop */
1851 buffer[nbchars++] = *current++;
1852 if (nbchars >
1853 buffer_size - XML_PARSER_BUFFER_SIZE) {
1854 growBuffer(buffer);
1855 }
1856 }
1857 xmlFree(rep);
1858 }
1859 } else if (ent != NULL) {
1860 int i = xmlStrlen(ent->name);
1861 const xmlChar *cur = ent->name;
1862
1863 buffer[nbchars++] = '&';
1864 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1865 growBuffer(buffer);
1866 }
1867 for (;i > 0;i--)
1868 buffer[nbchars++] = *cur++;
1869 buffer[nbchars++] = ';';
1870 }
1871 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1872 if (xmlParserDebugEntities)
1873 xmlGenericError(xmlGenericErrorContext,
1874 "String decoding PE Reference: %.30s\n", str);
1875 ent = xmlParseStringPEReference(ctxt, &str);
1876 if (ent != NULL) {
1877 xmlChar *rep;
1878
1879 ctxt->depth++;
1880 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1881 0, 0, 0);
1882 ctxt->depth--;
1883 if (rep != NULL) {
1884 current = rep;
1885 while (*current != 0) { /* non input consuming loop */
1886 buffer[nbchars++] = *current++;
1887 if (nbchars >
1888 buffer_size - XML_PARSER_BUFFER_SIZE) {
1889 growBuffer(buffer);
1890 }
1891 }
1892 xmlFree(rep);
1893 }
1894 }
1895 } else {
1896 COPY_BUF(l,buffer,nbchars,c);
1897 str += l;
1898 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1899 growBuffer(buffer);
1900 }
1901 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 if (str < last)
1903 c = CUR_SCHAR(str, l);
1904 else
1905 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001906 }
1907 buffer[nbchars++] = 0;
1908 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001909
1910mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001911 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001912 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001913}
1914
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915/**
1916 * xmlStringDecodeEntities:
1917 * @ctxt: the parser context
1918 * @str: the input string
1919 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1920 * @end: an end marker xmlChar, 0 if none
1921 * @end2: an end marker xmlChar, 0 if none
1922 * @end3: an end marker xmlChar, 0 if none
1923 *
1924 * Takes a entity string content and process to do the adequate substitutions.
1925 *
1926 * [67] Reference ::= EntityRef | CharRef
1927 *
1928 * [69] PEReference ::= '%' Name ';'
1929 *
1930 * Returns A newly allocated string with the substitution done. The caller
1931 * must deallocate it !
1932 */
1933xmlChar *
1934xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1935 xmlChar end, xmlChar end2, xmlChar end3) {
1936 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1937 end, end2, end3));
1938}
Owen Taylor3473f882001-02-23 17:55:21 +00001939
1940/************************************************************************
1941 * *
1942 * Commodity functions to handle xmlChars *
1943 * *
1944 ************************************************************************/
1945
1946/**
1947 * xmlStrndup:
1948 * @cur: the input xmlChar *
1949 * @len: the len of @cur
1950 *
1951 * a strndup for array of xmlChar's
1952 *
1953 * Returns a new xmlChar * or NULL
1954 */
1955xmlChar *
1956xmlStrndup(const xmlChar *cur, int len) {
1957 xmlChar *ret;
1958
1959 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001960 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001961 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001962 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001963 return(NULL);
1964 }
1965 memcpy(ret, cur, len * sizeof(xmlChar));
1966 ret[len] = 0;
1967 return(ret);
1968}
1969
1970/**
1971 * xmlStrdup:
1972 * @cur: the input xmlChar *
1973 *
1974 * a strdup for array of xmlChar's. Since they are supposed to be
1975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1976 * a termination mark of '0'.
1977 *
1978 * Returns a new xmlChar * or NULL
1979 */
1980xmlChar *
1981xmlStrdup(const xmlChar *cur) {
1982 const xmlChar *p = cur;
1983
1984 if (cur == NULL) return(NULL);
1985 while (*p != 0) p++; /* non input consuming */
1986 return(xmlStrndup(cur, p - cur));
1987}
1988
1989/**
1990 * xmlCharStrndup:
1991 * @cur: the input char *
1992 * @len: the len of @cur
1993 *
1994 * a strndup for char's to xmlChar's
1995 *
1996 * Returns a new xmlChar * or NULL
1997 */
1998
1999xmlChar *
2000xmlCharStrndup(const char *cur, int len) {
2001 int i;
2002 xmlChar *ret;
2003
2004 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002005 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002006 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002007 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002008 return(NULL);
2009 }
2010 for (i = 0;i < len;i++)
2011 ret[i] = (xmlChar) cur[i];
2012 ret[len] = 0;
2013 return(ret);
2014}
2015
2016/**
2017 * xmlCharStrdup:
2018 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002019 *
2020 * a strdup for char's to xmlChar's
2021 *
2022 * Returns a new xmlChar * or NULL
2023 */
2024
2025xmlChar *
2026xmlCharStrdup(const char *cur) {
2027 const char *p = cur;
2028
2029 if (cur == NULL) return(NULL);
2030 while (*p != '\0') p++; /* non input consuming */
2031 return(xmlCharStrndup(cur, p - cur));
2032}
2033
2034/**
2035 * xmlStrcmp:
2036 * @str1: the first xmlChar *
2037 * @str2: the second xmlChar *
2038 *
2039 * a strcmp for xmlChar's
2040 *
2041 * Returns the integer result of the comparison
2042 */
2043
2044int
2045xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2046 register int tmp;
2047
2048 if (str1 == str2) return(0);
2049 if (str1 == NULL) return(-1);
2050 if (str2 == NULL) return(1);
2051 do {
2052 tmp = *str1++ - *str2;
2053 if (tmp != 0) return(tmp);
2054 } while (*str2++ != 0);
2055 return 0;
2056}
2057
2058/**
2059 * xmlStrEqual:
2060 * @str1: the first xmlChar *
2061 * @str2: the second xmlChar *
2062 *
2063 * Check if both string are equal of have same content
2064 * Should be a bit more readable and faster than xmlStrEqual()
2065 *
2066 * Returns 1 if they are equal, 0 if they are different
2067 */
2068
2069int
2070xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2071 if (str1 == str2) return(1);
2072 if (str1 == NULL) return(0);
2073 if (str2 == NULL) return(0);
2074 do {
2075 if (*str1++ != *str2) return(0);
2076 } while (*str2++);
2077 return(1);
2078}
2079
2080/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002081 * xmlStrQEqual:
2082 * @pref: the prefix of the QName
2083 * @name: the localname of the QName
2084 * @str: the second xmlChar *
2085 *
2086 * Check if a QName is Equal to a given string
2087 *
2088 * Returns 1 if they are equal, 0 if they are different
2089 */
2090
2091int
2092xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2093 if (pref == NULL) return(xmlStrEqual(name, str));
2094 if (name == NULL) return(0);
2095 if (str == NULL) return(0);
2096
2097 do {
2098 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002099 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002100 if (*str++ != ':') return(0);
2101 do {
2102 if (*name++ != *str) return(0);
2103 } while (*str++);
2104 return(1);
2105}
2106
2107/**
Owen Taylor3473f882001-02-23 17:55:21 +00002108 * xmlStrncmp:
2109 * @str1: the first xmlChar *
2110 * @str2: the second xmlChar *
2111 * @len: the max comparison length
2112 *
2113 * a strncmp for xmlChar's
2114 *
2115 * Returns the integer result of the comparison
2116 */
2117
2118int
2119xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2120 register int tmp;
2121
2122 if (len <= 0) return(0);
2123 if (str1 == str2) return(0);
2124 if (str1 == NULL) return(-1);
2125 if (str2 == NULL) return(1);
2126 do {
2127 tmp = *str1++ - *str2;
2128 if (tmp != 0 || --len == 0) return(tmp);
2129 } while (*str2++ != 0);
2130 return 0;
2131}
2132
Daniel Veillardb44025c2001-10-11 22:55:55 +00002133static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002134 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2135 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2136 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2137 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2138 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2139 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2140 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2141 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2142 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2143 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2144 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2145 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2146 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2147 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2148 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2149 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2150 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2151 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2152 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2153 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2154 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2155 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2156 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2157 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2158 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2159 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2160 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2161 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2162 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2163 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2164 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2165 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2166};
2167
2168/**
2169 * xmlStrcasecmp:
2170 * @str1: the first xmlChar *
2171 * @str2: the second xmlChar *
2172 *
2173 * a strcasecmp for xmlChar's
2174 *
2175 * Returns the integer result of the comparison
2176 */
2177
2178int
2179xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2180 register int tmp;
2181
2182 if (str1 == str2) return(0);
2183 if (str1 == NULL) return(-1);
2184 if (str2 == NULL) return(1);
2185 do {
2186 tmp = casemap[*str1++] - casemap[*str2];
2187 if (tmp != 0) return(tmp);
2188 } while (*str2++ != 0);
2189 return 0;
2190}
2191
2192/**
2193 * xmlStrncasecmp:
2194 * @str1: the first xmlChar *
2195 * @str2: the second xmlChar *
2196 * @len: the max comparison length
2197 *
2198 * a strncasecmp for xmlChar's
2199 *
2200 * Returns the integer result of the comparison
2201 */
2202
2203int
2204xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2205 register int tmp;
2206
2207 if (len <= 0) return(0);
2208 if (str1 == str2) return(0);
2209 if (str1 == NULL) return(-1);
2210 if (str2 == NULL) return(1);
2211 do {
2212 tmp = casemap[*str1++] - casemap[*str2];
2213 if (tmp != 0 || --len == 0) return(tmp);
2214 } while (*str2++ != 0);
2215 return 0;
2216}
2217
2218/**
2219 * xmlStrchr:
2220 * @str: the xmlChar * array
2221 * @val: the xmlChar to search
2222 *
2223 * a strchr for xmlChar's
2224 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002225 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002226 */
2227
2228const xmlChar *
2229xmlStrchr(const xmlChar *str, xmlChar val) {
2230 if (str == NULL) return(NULL);
2231 while (*str != 0) { /* non input consuming */
2232 if (*str == val) return((xmlChar *) str);
2233 str++;
2234 }
2235 return(NULL);
2236}
2237
2238/**
2239 * xmlStrstr:
2240 * @str: the xmlChar * array (haystack)
2241 * @val: the xmlChar to search (needle)
2242 *
2243 * a strstr for xmlChar's
2244 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002245 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002246 */
2247
2248const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002249xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002250 int n;
2251
2252 if (str == NULL) return(NULL);
2253 if (val == NULL) return(NULL);
2254 n = xmlStrlen(val);
2255
2256 if (n == 0) return(str);
2257 while (*str != 0) { /* non input consuming */
2258 if (*str == *val) {
2259 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2260 }
2261 str++;
2262 }
2263 return(NULL);
2264}
2265
2266/**
2267 * xmlStrcasestr:
2268 * @str: the xmlChar * array (haystack)
2269 * @val: the xmlChar to search (needle)
2270 *
2271 * a case-ignoring strstr for xmlChar's
2272 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002273 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002274 */
2275
2276const xmlChar *
2277xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2278 int n;
2279
2280 if (str == NULL) return(NULL);
2281 if (val == NULL) return(NULL);
2282 n = xmlStrlen(val);
2283
2284 if (n == 0) return(str);
2285 while (*str != 0) { /* non input consuming */
2286 if (casemap[*str] == casemap[*val])
2287 if (!xmlStrncasecmp(str, val, n)) return(str);
2288 str++;
2289 }
2290 return(NULL);
2291}
2292
2293/**
2294 * xmlStrsub:
2295 * @str: the xmlChar * array (haystack)
2296 * @start: the index of the first char (zero based)
2297 * @len: the length of the substring
2298 *
2299 * Extract a substring of a given string
2300 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002301 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002302 */
2303
2304xmlChar *
2305xmlStrsub(const xmlChar *str, int start, int len) {
2306 int i;
2307
2308 if (str == NULL) return(NULL);
2309 if (start < 0) return(NULL);
2310 if (len < 0) return(NULL);
2311
2312 for (i = 0;i < start;i++) {
2313 if (*str == 0) return(NULL);
2314 str++;
2315 }
2316 if (*str == 0) return(NULL);
2317 return(xmlStrndup(str, len));
2318}
2319
2320/**
2321 * xmlStrlen:
2322 * @str: the xmlChar * array
2323 *
2324 * length of a xmlChar's string
2325 *
2326 * Returns the number of xmlChar contained in the ARRAY.
2327 */
2328
2329int
2330xmlStrlen(const xmlChar *str) {
2331 int len = 0;
2332
2333 if (str == NULL) return(0);
2334 while (*str != 0) { /* non input consuming */
2335 str++;
2336 len++;
2337 }
2338 return(len);
2339}
2340
2341/**
2342 * xmlStrncat:
2343 * @cur: the original xmlChar * array
2344 * @add: the xmlChar * array added
2345 * @len: the length of @add
2346 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002347 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002348 * first bytes of @add.
2349 *
2350 * Returns a new xmlChar *, the original @cur is reallocated if needed
2351 * and should not be freed
2352 */
2353
2354xmlChar *
2355xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2356 int size;
2357 xmlChar *ret;
2358
2359 if ((add == NULL) || (len == 0))
2360 return(cur);
2361 if (cur == NULL)
2362 return(xmlStrndup(add, len));
2363
2364 size = xmlStrlen(cur);
2365 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2366 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002367 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002368 return(cur);
2369 }
2370 memcpy(&ret[size], add, len * sizeof(xmlChar));
2371 ret[size + len] = 0;
2372 return(ret);
2373}
2374
2375/**
2376 * xmlStrcat:
2377 * @cur: the original xmlChar * array
2378 * @add: the xmlChar * array added
2379 *
2380 * a strcat for array of xmlChar's. Since they are supposed to be
2381 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2382 * a termination mark of '0'.
2383 *
2384 * Returns a new xmlChar * containing the concatenated string.
2385 */
2386xmlChar *
2387xmlStrcat(xmlChar *cur, const xmlChar *add) {
2388 const xmlChar *p = add;
2389
2390 if (add == NULL) return(cur);
2391 if (cur == NULL)
2392 return(xmlStrdup(add));
2393
2394 while (*p != 0) p++; /* non input consuming */
2395 return(xmlStrncat(cur, add, p - add));
2396}
2397
2398/************************************************************************
2399 * *
2400 * Commodity functions, cleanup needed ? *
2401 * *
2402 ************************************************************************/
2403
2404/**
2405 * areBlanks:
2406 * @ctxt: an XML parser context
2407 * @str: a xmlChar *
2408 * @len: the size of @str
2409 *
2410 * Is this a sequence of blank chars that one can ignore ?
2411 *
2412 * Returns 1 if ignorable 0 otherwise.
2413 */
2414
2415static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2416 int i, ret;
2417 xmlNodePtr lastChild;
2418
Daniel Veillard05c13a22001-09-09 08:38:09 +00002419 /*
2420 * Don't spend time trying to differentiate them, the same callback is
2421 * used !
2422 */
2423 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002424 return(0);
2425
Owen Taylor3473f882001-02-23 17:55:21 +00002426 /*
2427 * Check for xml:space value.
2428 */
2429 if (*(ctxt->space) == 1)
2430 return(0);
2431
2432 /*
2433 * Check that the string is made of blanks
2434 */
2435 for (i = 0;i < len;i++)
2436 if (!(IS_BLANK(str[i]))) return(0);
2437
2438 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002439 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002440 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002441 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 if (ctxt->myDoc != NULL) {
2443 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2444 if (ret == 0) return(1);
2445 if (ret == 1) return(0);
2446 }
2447
2448 /*
2449 * Otherwise, heuristic :-\
2450 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002451 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 if ((ctxt->node->children == NULL) &&
2453 (RAW == '<') && (NXT(1) == '/')) return(0);
2454
2455 lastChild = xmlGetLastChild(ctxt->node);
2456 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002457 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2458 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002459 } else if (xmlNodeIsText(lastChild))
2460 return(0);
2461 else if ((ctxt->node->children != NULL) &&
2462 (xmlNodeIsText(ctxt->node->children)))
2463 return(0);
2464 return(1);
2465}
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467/************************************************************************
2468 * *
2469 * Extra stuff for namespace support *
2470 * Relates to http://www.w3.org/TR/WD-xml-names *
2471 * *
2472 ************************************************************************/
2473
2474/**
2475 * xmlSplitQName:
2476 * @ctxt: an XML parser context
2477 * @name: an XML parser context
2478 * @prefix: a xmlChar **
2479 *
2480 * parse an UTF8 encoded XML qualified name string
2481 *
2482 * [NS 5] QName ::= (Prefix ':')? LocalPart
2483 *
2484 * [NS 6] Prefix ::= NCName
2485 *
2486 * [NS 7] LocalPart ::= NCName
2487 *
2488 * Returns the local part, and prefix is updated
2489 * to get the Prefix if any.
2490 */
2491
2492xmlChar *
2493xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2494 xmlChar buf[XML_MAX_NAMELEN + 5];
2495 xmlChar *buffer = NULL;
2496 int len = 0;
2497 int max = XML_MAX_NAMELEN;
2498 xmlChar *ret = NULL;
2499 const xmlChar *cur = name;
2500 int c;
2501
2502 *prefix = NULL;
2503
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002504 if (cur == NULL) return(NULL);
2505
Owen Taylor3473f882001-02-23 17:55:21 +00002506#ifndef XML_XML_NAMESPACE
2507 /* xml: prefix is not really a namespace */
2508 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2509 (cur[2] == 'l') && (cur[3] == ':'))
2510 return(xmlStrdup(name));
2511#endif
2512
Daniel Veillard597bc482003-07-24 16:08:28 +00002513 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (cur[0] == ':')
2515 return(xmlStrdup(name));
2516
2517 c = *cur++;
2518 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2519 buf[len++] = c;
2520 c = *cur++;
2521 }
2522 if (len >= max) {
2523 /*
2524 * Okay someone managed to make a huge name, so he's ready to pay
2525 * for the processing speed.
2526 */
2527 max = len * 2;
2528
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002529 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002531 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002532 return(NULL);
2533 }
2534 memcpy(buffer, buf, len);
2535 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2536 if (len + 10 > max) {
2537 max *= 2;
2538 buffer = (xmlChar *) xmlRealloc(buffer,
2539 max * sizeof(xmlChar));
2540 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002541 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002542 return(NULL);
2543 }
2544 }
2545 buffer[len++] = c;
2546 c = *cur++;
2547 }
2548 buffer[len] = 0;
2549 }
2550
Daniel Veillard597bc482003-07-24 16:08:28 +00002551 /* nasty but well=formed
2552 if ((c == ':') && (*cur == 0)) {
2553 return(xmlStrdup(name));
2554 } */
2555
Owen Taylor3473f882001-02-23 17:55:21 +00002556 if (buffer == NULL)
2557 ret = xmlStrndup(buf, len);
2558 else {
2559 ret = buffer;
2560 buffer = NULL;
2561 max = XML_MAX_NAMELEN;
2562 }
2563
2564
2565 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002566 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002568 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002569 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002570 }
Owen Taylor3473f882001-02-23 17:55:21 +00002571 len = 0;
2572
Daniel Veillardbb284f42002-10-16 18:02:47 +00002573 /*
2574 * Check that the first character is proper to start
2575 * a new name
2576 */
2577 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2578 ((c >= 0x41) && (c <= 0x5A)) ||
2579 (c == '_') || (c == ':'))) {
2580 int l;
2581 int first = CUR_SCHAR(cur, l);
2582
2583 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002584 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002585 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002586 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002587 }
2588 }
2589 cur++;
2590
Owen Taylor3473f882001-02-23 17:55:21 +00002591 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2592 buf[len++] = c;
2593 c = *cur++;
2594 }
2595 if (len >= max) {
2596 /*
2597 * Okay someone managed to make a huge name, so he's ready to pay
2598 * for the processing speed.
2599 */
2600 max = len * 2;
2601
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002602 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002603 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002605 return(NULL);
2606 }
2607 memcpy(buffer, buf, len);
2608 while (c != 0) { /* tested bigname2.xml */
2609 if (len + 10 > max) {
2610 max *= 2;
2611 buffer = (xmlChar *) xmlRealloc(buffer,
2612 max * sizeof(xmlChar));
2613 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002614 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002615 return(NULL);
2616 }
2617 }
2618 buffer[len++] = c;
2619 c = *cur++;
2620 }
2621 buffer[len] = 0;
2622 }
2623
2624 if (buffer == NULL)
2625 ret = xmlStrndup(buf, len);
2626 else {
2627 ret = buffer;
2628 }
2629 }
2630
2631 return(ret);
2632}
2633
2634/************************************************************************
2635 * *
2636 * The parser itself *
2637 * Relates to http://www.w3.org/TR/REC-xml *
2638 * *
2639 ************************************************************************/
2640
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002641static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002642static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002643 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644
Owen Taylor3473f882001-02-23 17:55:21 +00002645/**
2646 * xmlParseName:
2647 * @ctxt: an XML parser context
2648 *
2649 * parse an XML name.
2650 *
2651 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2652 * CombiningChar | Extender
2653 *
2654 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2655 *
2656 * [6] Names ::= Name (S Name)*
2657 *
2658 * Returns the Name parsed or NULL
2659 */
2660
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002661const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002662xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002663 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002664 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002665 int count = 0;
2666
2667 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002668
2669 /*
2670 * Accelerator for simple ASCII names
2671 */
2672 in = ctxt->input->cur;
2673 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2674 ((*in >= 0x41) && (*in <= 0x5A)) ||
2675 (*in == '_') || (*in == ':')) {
2676 in++;
2677 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2678 ((*in >= 0x41) && (*in <= 0x5A)) ||
2679 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002680 (*in == '_') || (*in == '-') ||
2681 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002682 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002683 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002684 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002685 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002686 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002687 ctxt->nbChars += count;
2688 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002689 if (ret == NULL)
2690 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002691 return(ret);
2692 }
2693 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002694 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002695}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002696
Daniel Veillard46de64e2002-05-29 08:21:33 +00002697/**
2698 * xmlParseNameAndCompare:
2699 * @ctxt: an XML parser context
2700 *
2701 * parse an XML name and compares for match
2702 * (specialized for endtag parsing)
2703 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002704 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2705 * and the name for mismatch
2706 */
2707
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002708static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002709xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2710 const xmlChar *cmp = other;
2711 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002712 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002713
2714 GROW;
2715
2716 in = ctxt->input->cur;
2717 while (*in != 0 && *in == *cmp) {
2718 ++in;
2719 ++cmp;
2720 }
2721 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2722 /* success */
2723 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002724 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002725 }
2726 /* failure (or end of input buffer), check with full function */
2727 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002728 /* strings coming from the dictionnary direct compare possible */
2729 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002730 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002731 }
2732 return ret;
2733}
2734
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002735static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002736xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002737 int len = 0, l;
2738 int c;
2739 int count = 0;
2740
2741 /*
2742 * Handler for more complex cases
2743 */
2744 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002745 c = CUR_CHAR(l);
2746 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2747 (!IS_LETTER(c) && (c != '_') &&
2748 (c != ':'))) {
2749 return(NULL);
2750 }
2751
2752 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2753 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
2756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c)))) {
2758 if (count++ > 100) {
2759 count = 0;
2760 GROW;
2761 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002762 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002763 NEXTL(l);
2764 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002765 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002766 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002767}
2768
2769/**
2770 * xmlParseStringName:
2771 * @ctxt: an XML parser context
2772 * @str: a pointer to the string pointer (IN/OUT)
2773 *
2774 * parse an XML name.
2775 *
2776 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2777 * CombiningChar | Extender
2778 *
2779 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2780 *
2781 * [6] Names ::= Name (S Name)*
2782 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002783 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002784 * is updated to the current location in the string.
2785 */
2786
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002787static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002788xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2789 xmlChar buf[XML_MAX_NAMELEN + 5];
2790 const xmlChar *cur = *str;
2791 int len = 0, l;
2792 int c;
2793
2794 c = CUR_SCHAR(cur, l);
2795 if (!IS_LETTER(c) && (c != '_') &&
2796 (c != ':')) {
2797 return(NULL);
2798 }
2799
2800 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2801 (c == '.') || (c == '-') ||
2802 (c == '_') || (c == ':') ||
2803 (IS_COMBINING(c)) ||
2804 (IS_EXTENDER(c))) {
2805 COPY_BUF(l,buf,len,c);
2806 cur += l;
2807 c = CUR_SCHAR(cur, l);
2808 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2809 /*
2810 * Okay someone managed to make a huge name, so he's ready to pay
2811 * for the processing speed.
2812 */
2813 xmlChar *buffer;
2814 int max = len * 2;
2815
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002816 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002817 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002818 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002819 return(NULL);
2820 }
2821 memcpy(buffer, buf, len);
2822 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2823 (c == '.') || (c == '-') ||
2824 (c == '_') || (c == ':') ||
2825 (IS_COMBINING(c)) ||
2826 (IS_EXTENDER(c))) {
2827 if (len + 10 > max) {
2828 max *= 2;
2829 buffer = (xmlChar *) xmlRealloc(buffer,
2830 max * sizeof(xmlChar));
2831 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002832 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002833 return(NULL);
2834 }
2835 }
2836 COPY_BUF(l,buffer,len,c);
2837 cur += l;
2838 c = CUR_SCHAR(cur, l);
2839 }
2840 buffer[len] = 0;
2841 *str = cur;
2842 return(buffer);
2843 }
2844 }
2845 *str = cur;
2846 return(xmlStrndup(buf, len));
2847}
2848
2849/**
2850 * xmlParseNmtoken:
2851 * @ctxt: an XML parser context
2852 *
2853 * parse an XML Nmtoken.
2854 *
2855 * [7] Nmtoken ::= (NameChar)+
2856 *
2857 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2858 *
2859 * Returns the Nmtoken parsed or NULL
2860 */
2861
2862xmlChar *
2863xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2864 xmlChar buf[XML_MAX_NAMELEN + 5];
2865 int len = 0, l;
2866 int c;
2867 int count = 0;
2868
2869 GROW;
2870 c = CUR_CHAR(l);
2871
2872 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2873 (c == '.') || (c == '-') ||
2874 (c == '_') || (c == ':') ||
2875 (IS_COMBINING(c)) ||
2876 (IS_EXTENDER(c))) {
2877 if (count++ > 100) {
2878 count = 0;
2879 GROW;
2880 }
2881 COPY_BUF(l,buf,len,c);
2882 NEXTL(l);
2883 c = CUR_CHAR(l);
2884 if (len >= XML_MAX_NAMELEN) {
2885 /*
2886 * Okay someone managed to make a huge token, so he's ready to pay
2887 * for the processing speed.
2888 */
2889 xmlChar *buffer;
2890 int max = len * 2;
2891
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002892 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002894 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002895 return(NULL);
2896 }
2897 memcpy(buffer, buf, len);
2898 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2899 (c == '.') || (c == '-') ||
2900 (c == '_') || (c == ':') ||
2901 (IS_COMBINING(c)) ||
2902 (IS_EXTENDER(c))) {
2903 if (count++ > 100) {
2904 count = 0;
2905 GROW;
2906 }
2907 if (len + 10 > max) {
2908 max *= 2;
2909 buffer = (xmlChar *) xmlRealloc(buffer,
2910 max * sizeof(xmlChar));
2911 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002912 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 return(NULL);
2914 }
2915 }
2916 COPY_BUF(l,buffer,len,c);
2917 NEXTL(l);
2918 c = CUR_CHAR(l);
2919 }
2920 buffer[len] = 0;
2921 return(buffer);
2922 }
2923 }
2924 if (len == 0)
2925 return(NULL);
2926 return(xmlStrndup(buf, len));
2927}
2928
2929/**
2930 * xmlParseEntityValue:
2931 * @ctxt: an XML parser context
2932 * @orig: if non-NULL store a copy of the original entity value
2933 *
2934 * parse a value for ENTITY declarations
2935 *
2936 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2937 * "'" ([^%&'] | PEReference | Reference)* "'"
2938 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002939 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002940 */
2941
2942xmlChar *
2943xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2944 xmlChar *buf = NULL;
2945 int len = 0;
2946 int size = XML_PARSER_BUFFER_SIZE;
2947 int c, l;
2948 xmlChar stop;
2949 xmlChar *ret = NULL;
2950 const xmlChar *cur = NULL;
2951 xmlParserInputPtr input;
2952
2953 if (RAW == '"') stop = '"';
2954 else if (RAW == '\'') stop = '\'';
2955 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002956 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 return(NULL);
2958 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002959 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002960 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002961 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002962 return(NULL);
2963 }
2964
2965 /*
2966 * The content of the entity definition is copied in a buffer.
2967 */
2968
2969 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2970 input = ctxt->input;
2971 GROW;
2972 NEXT;
2973 c = CUR_CHAR(l);
2974 /*
2975 * NOTE: 4.4.5 Included in Literal
2976 * When a parameter entity reference appears in a literal entity
2977 * value, ... a single or double quote character in the replacement
2978 * text is always treated as a normal data character and will not
2979 * terminate the literal.
2980 * In practice it means we stop the loop only when back at parsing
2981 * the initial entity and the quote is found
2982 */
2983 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2984 (ctxt->input != input))) {
2985 if (len + 5 >= size) {
2986 size *= 2;
2987 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2988 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002989 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002990 return(NULL);
2991 }
2992 }
2993 COPY_BUF(l,buf,len,c);
2994 NEXTL(l);
2995 /*
2996 * Pop-up of finished entities.
2997 */
2998 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2999 xmlPopInput(ctxt);
3000
3001 GROW;
3002 c = CUR_CHAR(l);
3003 if (c == 0) {
3004 GROW;
3005 c = CUR_CHAR(l);
3006 }
3007 }
3008 buf[len] = 0;
3009
3010 /*
3011 * Raise problem w.r.t. '&' and '%' being used in non-entities
3012 * reference constructs. Note Charref will be handled in
3013 * xmlStringDecodeEntities()
3014 */
3015 cur = buf;
3016 while (*cur != 0) { /* non input consuming */
3017 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3018 xmlChar *name;
3019 xmlChar tmp = *cur;
3020
3021 cur++;
3022 name = xmlParseStringName(ctxt, &cur);
3023 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003024 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003025 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003026 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003028 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3029 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003030 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003031 }
3032 if (name != NULL)
3033 xmlFree(name);
3034 }
3035 cur++;
3036 }
3037
3038 /*
3039 * Then PEReference entities are substituted.
3040 */
3041 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003042 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 xmlFree(buf);
3044 } else {
3045 NEXT;
3046 /*
3047 * NOTE: 4.4.7 Bypassed
3048 * When a general entity reference appears in the EntityValue in
3049 * an entity declaration, it is bypassed and left as is.
3050 * so XML_SUBSTITUTE_REF is not set here.
3051 */
3052 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3053 0, 0, 0);
3054 if (orig != NULL)
3055 *orig = buf;
3056 else
3057 xmlFree(buf);
3058 }
3059
3060 return(ret);
3061}
3062
3063/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003064 * xmlParseAttValueComplex:
3065 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003066 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003067 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003068 *
3069 * parse a value for an attribute, this is the fallback function
3070 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003071 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003072 *
3073 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3074 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003075static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003076xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003077 xmlChar limit = 0;
3078 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003079 int len = 0;
3080 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003081 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003082 xmlChar *current = NULL;
3083 xmlEntityPtr ent;
3084
Owen Taylor3473f882001-02-23 17:55:21 +00003085 if (NXT(0) == '"') {
3086 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3087 limit = '"';
3088 NEXT;
3089 } else if (NXT(0) == '\'') {
3090 limit = '\'';
3091 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3092 NEXT;
3093 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003094 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003095 return(NULL);
3096 }
3097
3098 /*
3099 * allocate a translation buffer.
3100 */
3101 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003102 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003103 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003104
3105 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003106 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003107 */
3108 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003109 while ((NXT(0) != limit) && /* checked */
3110 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003111 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003112 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003113 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (NXT(1) == '#') {
3115 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003116
Owen Taylor3473f882001-02-23 17:55:21 +00003117 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003118 if (ctxt->replaceEntities) {
3119 if (len > buf_size - 10) {
3120 growBuffer(buf);
3121 }
3122 buf[len++] = '&';
3123 } else {
3124 /*
3125 * The reparsing will be done in xmlStringGetNodeList()
3126 * called by the attribute() function in SAX.c
3127 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003128 if (len > buf_size - 10) {
3129 growBuffer(buf);
3130 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003131 buf[len++] = '&';
3132 buf[len++] = '#';
3133 buf[len++] = '3';
3134 buf[len++] = '8';
3135 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003136 }
3137 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003138 if (len > buf_size - 10) {
3139 growBuffer(buf);
3140 }
Owen Taylor3473f882001-02-23 17:55:21 +00003141 len += xmlCopyChar(0, &buf[len], val);
3142 }
3143 } else {
3144 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003145 if ((ent != NULL) &&
3146 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3147 if (len > buf_size - 10) {
3148 growBuffer(buf);
3149 }
3150 if ((ctxt->replaceEntities == 0) &&
3151 (ent->content[0] == '&')) {
3152 buf[len++] = '&';
3153 buf[len++] = '#';
3154 buf[len++] = '3';
3155 buf[len++] = '8';
3156 buf[len++] = ';';
3157 } else {
3158 buf[len++] = ent->content[0];
3159 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003160 } else if ((ent != NULL) &&
3161 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003162 xmlChar *rep;
3163
3164 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3165 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003166 XML_SUBSTITUTE_REF,
3167 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003168 if (rep != NULL) {
3169 current = rep;
3170 while (*current != 0) { /* non input consuming */
3171 buf[len++] = *current++;
3172 if (len > buf_size - 10) {
3173 growBuffer(buf);
3174 }
3175 }
3176 xmlFree(rep);
3177 }
3178 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003179 if (len > buf_size - 10) {
3180 growBuffer(buf);
3181 }
Owen Taylor3473f882001-02-23 17:55:21 +00003182 if (ent->content != NULL)
3183 buf[len++] = ent->content[0];
3184 }
3185 } else if (ent != NULL) {
3186 int i = xmlStrlen(ent->name);
3187 const xmlChar *cur = ent->name;
3188
3189 /*
3190 * This may look absurd but is needed to detect
3191 * entities problems
3192 */
3193 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3194 (ent->content != NULL)) {
3195 xmlChar *rep;
3196 rep = xmlStringDecodeEntities(ctxt, ent->content,
3197 XML_SUBSTITUTE_REF, 0, 0, 0);
3198 if (rep != NULL)
3199 xmlFree(rep);
3200 }
3201
3202 /*
3203 * Just output the reference
3204 */
3205 buf[len++] = '&';
3206 if (len > buf_size - i - 10) {
3207 growBuffer(buf);
3208 }
3209 for (;i > 0;i--)
3210 buf[len++] = *cur++;
3211 buf[len++] = ';';
3212 }
3213 }
3214 } else {
3215 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003216 if ((len != 0) || (!normalize)) {
3217 if ((!normalize) || (!in_space)) {
3218 COPY_BUF(l,buf,len,0x20);
3219 if (len > buf_size - 10) {
3220 growBuffer(buf);
3221 }
3222 }
3223 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003224 }
3225 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003226 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003227 COPY_BUF(l,buf,len,c);
3228 if (len > buf_size - 10) {
3229 growBuffer(buf);
3230 }
3231 }
3232 NEXTL(l);
3233 }
3234 GROW;
3235 c = CUR_CHAR(l);
3236 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003237 if ((in_space) && (normalize)) {
3238 while (buf[len - 1] == 0x20) len--;
3239 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003240 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003241 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003242 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003243 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003244 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3245 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003246 } else
3247 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003248 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003249 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003250
3251mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003252 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003253 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003254}
3255
3256/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003257 * xmlParseAttValue:
3258 * @ctxt: an XML parser context
3259 *
3260 * parse a value for an attribute
3261 * Note: the parser won't do substitution of entities here, this
3262 * will be handled later in xmlStringGetNodeList
3263 *
3264 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3265 * "'" ([^<&'] | Reference)* "'"
3266 *
3267 * 3.3.3 Attribute-Value Normalization:
3268 * Before the value of an attribute is passed to the application or
3269 * checked for validity, the XML processor must normalize it as follows:
3270 * - a character reference is processed by appending the referenced
3271 * character to the attribute value
3272 * - an entity reference is processed by recursively processing the
3273 * replacement text of the entity
3274 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3275 * appending #x20 to the normalized value, except that only a single
3276 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3277 * parsed entity or the literal entity value of an internal parsed entity
3278 * - other characters are processed by appending them to the normalized value
3279 * If the declared value is not CDATA, then the XML processor must further
3280 * process the normalized attribute value by discarding any leading and
3281 * trailing space (#x20) characters, and by replacing sequences of space
3282 * (#x20) characters by a single space (#x20) character.
3283 * All attributes for which no declaration has been read should be treated
3284 * by a non-validating parser as if declared CDATA.
3285 *
3286 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3287 */
3288
3289
3290xmlChar *
3291xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003292 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003293}
3294
3295/**
Owen Taylor3473f882001-02-23 17:55:21 +00003296 * xmlParseSystemLiteral:
3297 * @ctxt: an XML parser context
3298 *
3299 * parse an XML Literal
3300 *
3301 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3302 *
3303 * Returns the SystemLiteral parsed or NULL
3304 */
3305
3306xmlChar *
3307xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3308 xmlChar *buf = NULL;
3309 int len = 0;
3310 int size = XML_PARSER_BUFFER_SIZE;
3311 int cur, l;
3312 xmlChar stop;
3313 int state = ctxt->instate;
3314 int count = 0;
3315
3316 SHRINK;
3317 if (RAW == '"') {
3318 NEXT;
3319 stop = '"';
3320 } else if (RAW == '\'') {
3321 NEXT;
3322 stop = '\'';
3323 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003324 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003325 return(NULL);
3326 }
3327
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003328 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003329 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003330 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003331 return(NULL);
3332 }
3333 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3334 cur = CUR_CHAR(l);
3335 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3336 if (len + 5 >= size) {
3337 size *= 2;
3338 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 ctxt->instate = (xmlParserInputState) state;
3342 return(NULL);
3343 }
3344 }
3345 count++;
3346 if (count > 50) {
3347 GROW;
3348 count = 0;
3349 }
3350 COPY_BUF(l,buf,len,cur);
3351 NEXTL(l);
3352 cur = CUR_CHAR(l);
3353 if (cur == 0) {
3354 GROW;
3355 SHRINK;
3356 cur = CUR_CHAR(l);
3357 }
3358 }
3359 buf[len] = 0;
3360 ctxt->instate = (xmlParserInputState) state;
3361 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003362 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003363 } else {
3364 NEXT;
3365 }
3366 return(buf);
3367}
3368
3369/**
3370 * xmlParsePubidLiteral:
3371 * @ctxt: an XML parser context
3372 *
3373 * parse an XML public literal
3374 *
3375 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3376 *
3377 * Returns the PubidLiteral parsed or NULL.
3378 */
3379
3380xmlChar *
3381xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3382 xmlChar *buf = NULL;
3383 int len = 0;
3384 int size = XML_PARSER_BUFFER_SIZE;
3385 xmlChar cur;
3386 xmlChar stop;
3387 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003388 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003389
3390 SHRINK;
3391 if (RAW == '"') {
3392 NEXT;
3393 stop = '"';
3394 } else if (RAW == '\'') {
3395 NEXT;
3396 stop = '\'';
3397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003398 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003399 return(NULL);
3400 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003401 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003402 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003404 return(NULL);
3405 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003406 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003407 cur = CUR;
3408 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3409 if (len + 1 >= size) {
3410 size *= 2;
3411 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3412 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003413 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003414 return(NULL);
3415 }
3416 }
3417 buf[len++] = cur;
3418 count++;
3419 if (count > 50) {
3420 GROW;
3421 count = 0;
3422 }
3423 NEXT;
3424 cur = CUR;
3425 if (cur == 0) {
3426 GROW;
3427 SHRINK;
3428 cur = CUR;
3429 }
3430 }
3431 buf[len] = 0;
3432 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003433 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003434 } else {
3435 NEXT;
3436 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003437 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003438 return(buf);
3439}
3440
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003442/**
3443 * xmlParseCharData:
3444 * @ctxt: an XML parser context
3445 * @cdata: int indicating whether we are within a CDATA section
3446 *
3447 * parse a CharData section.
3448 * if we are within a CDATA section ']]>' marks an end of section.
3449 *
3450 * The right angle bracket (>) may be represented using the string "&gt;",
3451 * and must, for compatibility, be escaped using "&gt;" or a character
3452 * reference when it appears in the string "]]>" in content, when that
3453 * string is not marking the end of a CDATA section.
3454 *
3455 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3456 */
3457
3458void
3459xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003460 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003461 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003462 int line = ctxt->input->line;
3463 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003464
3465 SHRINK;
3466 GROW;
3467 /*
3468 * Accelerated common case where input don't need to be
3469 * modified before passing it to the handler.
3470 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003471 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003472 in = ctxt->input->cur;
3473 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003474get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003475 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3476 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003477 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003478 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003479 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003480 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003481 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003482 ctxt->input->line++;
3483 in++;
3484 }
3485 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003486 }
3487 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003488 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003489 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003490 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003491 return;
3492 }
3493 in++;
3494 goto get_more;
3495 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003496 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003497 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003498 if ((ctxt->sax->ignorableWhitespace !=
3499 ctxt->sax->characters) &&
3500 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003501 const xmlChar *tmp = ctxt->input->cur;
3502 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003503
Daniel Veillarda7374592001-05-10 14:17:55 +00003504 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003505 ctxt->sax->ignorableWhitespace(ctxt->userData,
3506 tmp, nbchar);
3507 } else if (ctxt->sax->characters != NULL)
3508 ctxt->sax->characters(ctxt->userData,
3509 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003510 line = ctxt->input->line;
3511 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003512 } else {
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3515 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003516 line = ctxt->input->line;
3517 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003518 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003519 }
3520 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003521 if (*in == 0xD) {
3522 in++;
3523 if (*in == 0xA) {
3524 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003525 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003526 ctxt->input->line++;
3527 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003528 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003529 in--;
3530 }
3531 if (*in == '<') {
3532 return;
3533 }
3534 if (*in == '&') {
3535 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003536 }
3537 SHRINK;
3538 GROW;
3539 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003540 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003541 nbchar = 0;
3542 }
Daniel Veillard50582112001-03-26 22:52:16 +00003543 ctxt->input->line = line;
3544 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003545 xmlParseCharDataComplex(ctxt, cdata);
3546}
3547
Daniel Veillard01c13b52002-12-10 15:19:08 +00003548/**
3549 * xmlParseCharDataComplex:
3550 * @ctxt: an XML parser context
3551 * @cdata: int indicating whether we are within a CDATA section
3552 *
3553 * parse a CharData section.this is the fallback function
3554 * of xmlParseCharData() when the parsing requires handling
3555 * of non-ASCII characters.
3556 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003557void
3558xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003559 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3560 int nbchar = 0;
3561 int cur, l;
3562 int count = 0;
3563
3564 SHRINK;
3565 GROW;
3566 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003567 while ((cur != '<') && /* checked */
3568 (cur != '&') &&
3569 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003570 if ((cur == ']') && (NXT(1) == ']') &&
3571 (NXT(2) == '>')) {
3572 if (cdata) break;
3573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003574 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003575 }
3576 }
3577 COPY_BUF(l,buf,nbchar,cur);
3578 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003579 buf[nbchar] = 0;
3580
Owen Taylor3473f882001-02-23 17:55:21 +00003581 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003582 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003583 */
3584 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3585 if (areBlanks(ctxt, buf, nbchar)) {
3586 if (ctxt->sax->ignorableWhitespace != NULL)
3587 ctxt->sax->ignorableWhitespace(ctxt->userData,
3588 buf, nbchar);
3589 } else {
3590 if (ctxt->sax->characters != NULL)
3591 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3592 }
3593 }
3594 nbchar = 0;
3595 }
3596 count++;
3597 if (count > 50) {
3598 GROW;
3599 count = 0;
3600 }
3601 NEXTL(l);
3602 cur = CUR_CHAR(l);
3603 }
3604 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003605 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003606 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003607 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003608 */
3609 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3610 if (areBlanks(ctxt, buf, nbchar)) {
3611 if (ctxt->sax->ignorableWhitespace != NULL)
3612 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3613 } else {
3614 if (ctxt->sax->characters != NULL)
3615 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3616 }
3617 }
3618 }
3619}
3620
3621/**
3622 * xmlParseExternalID:
3623 * @ctxt: an XML parser context
3624 * @publicID: a xmlChar** receiving PubidLiteral
3625 * @strict: indicate whether we should restrict parsing to only
3626 * production [75], see NOTE below
3627 *
3628 * Parse an External ID or a Public ID
3629 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003630 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003631 * 'PUBLIC' S PubidLiteral S SystemLiteral
3632 *
3633 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3634 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3635 *
3636 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3637 *
3638 * Returns the function returns SystemLiteral and in the second
3639 * case publicID receives PubidLiteral, is strict is off
3640 * it is possible to return NULL and have publicID set.
3641 */
3642
3643xmlChar *
3644xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3645 xmlChar *URI = NULL;
3646
3647 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003648
3649 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3651 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3652 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3653 SKIP(6);
3654 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003655 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3656 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003657 }
3658 SKIP_BLANKS;
3659 URI = xmlParseSystemLiteral(ctxt);
3660 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003661 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003662 }
3663 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3664 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3665 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3666 SKIP(6);
3667 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003668 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003669 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003670 }
3671 SKIP_BLANKS;
3672 *publicID = xmlParsePubidLiteral(ctxt);
3673 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003674 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003675 }
3676 if (strict) {
3677 /*
3678 * We don't handle [83] so "S SystemLiteral" is required.
3679 */
3680 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003681 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003682 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003683 }
3684 } else {
3685 /*
3686 * We handle [83] so we return immediately, if
3687 * "S SystemLiteral" is not detected. From a purely parsing
3688 * point of view that's a nice mess.
3689 */
3690 const xmlChar *ptr;
3691 GROW;
3692
3693 ptr = CUR_PTR;
3694 if (!IS_BLANK(*ptr)) return(NULL);
3695
3696 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3697 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3698 }
3699 SKIP_BLANKS;
3700 URI = xmlParseSystemLiteral(ctxt);
3701 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003702 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003703 }
3704 }
3705 return(URI);
3706}
3707
3708/**
3709 * xmlParseComment:
3710 * @ctxt: an XML parser context
3711 *
3712 * Skip an XML (SGML) comment <!-- .... -->
3713 * The spec says that "For compatibility, the string "--" (double-hyphen)
3714 * must not occur within comments. "
3715 *
3716 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3717 */
3718void
3719xmlParseComment(xmlParserCtxtPtr ctxt) {
3720 xmlChar *buf = NULL;
3721 int len;
3722 int size = XML_PARSER_BUFFER_SIZE;
3723 int q, ql;
3724 int r, rl;
3725 int cur, l;
3726 xmlParserInputState state;
3727 xmlParserInputPtr input = ctxt->input;
3728 int count = 0;
3729
3730 /*
3731 * Check that there is a comment right here.
3732 */
3733 if ((RAW != '<') || (NXT(1) != '!') ||
3734 (NXT(2) != '-') || (NXT(3) != '-')) return;
3735
3736 state = ctxt->instate;
3737 ctxt->instate = XML_PARSER_COMMENT;
3738 SHRINK;
3739 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003740 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003741 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003742 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003743 ctxt->instate = state;
3744 return;
3745 }
3746 q = CUR_CHAR(ql);
3747 NEXTL(ql);
3748 r = CUR_CHAR(rl);
3749 NEXTL(rl);
3750 cur = CUR_CHAR(l);
3751 len = 0;
3752 while (IS_CHAR(cur) && /* checked */
3753 ((cur != '>') ||
3754 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003755 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003756 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 }
3758 if (len + 5 >= size) {
3759 size *= 2;
3760 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3761 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003762 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003763 ctxt->instate = state;
3764 return;
3765 }
3766 }
3767 COPY_BUF(ql,buf,len,q);
3768 q = r;
3769 ql = rl;
3770 r = cur;
3771 rl = l;
3772
3773 count++;
3774 if (count > 50) {
3775 GROW;
3776 count = 0;
3777 }
3778 NEXTL(l);
3779 cur = CUR_CHAR(l);
3780 if (cur == 0) {
3781 SHRINK;
3782 GROW;
3783 cur = CUR_CHAR(l);
3784 }
3785 }
3786 buf[len] = 0;
3787 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003788 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003789 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003790 xmlFree(buf);
3791 } else {
3792 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003793 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3794 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003795 }
3796 NEXT;
3797 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3798 (!ctxt->disableSAX))
3799 ctxt->sax->comment(ctxt->userData, buf);
3800 xmlFree(buf);
3801 }
3802 ctxt->instate = state;
3803}
3804
3805/**
3806 * xmlParsePITarget:
3807 * @ctxt: an XML parser context
3808 *
3809 * parse the name of a PI
3810 *
3811 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3812 *
3813 * Returns the PITarget name or NULL
3814 */
3815
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003816const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003817xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003818 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003819
3820 name = xmlParseName(ctxt);
3821 if ((name != NULL) &&
3822 ((name[0] == 'x') || (name[0] == 'X')) &&
3823 ((name[1] == 'm') || (name[1] == 'M')) &&
3824 ((name[2] == 'l') || (name[2] == 'L'))) {
3825 int i;
3826 if ((name[0] == 'x') && (name[1] == 'm') &&
3827 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003828 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003829 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003830 return(name);
3831 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003832 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003833 return(name);
3834 }
3835 for (i = 0;;i++) {
3836 if (xmlW3CPIs[i] == NULL) break;
3837 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3838 return(name);
3839 }
3840 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3841 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3842 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003843 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003844 }
3845 }
3846 return(name);
3847}
3848
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003849#ifdef LIBXML_CATALOG_ENABLED
3850/**
3851 * xmlParseCatalogPI:
3852 * @ctxt: an XML parser context
3853 * @catalog: the PI value string
3854 *
3855 * parse an XML Catalog Processing Instruction.
3856 *
3857 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3858 *
3859 * Occurs only if allowed by the user and if happening in the Misc
3860 * part of the document before any doctype informations
3861 * This will add the given catalog to the parsing context in order
3862 * to be used if there is a resolution need further down in the document
3863 */
3864
3865static void
3866xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3867 xmlChar *URL = NULL;
3868 const xmlChar *tmp, *base;
3869 xmlChar marker;
3870
3871 tmp = catalog;
3872 while (IS_BLANK(*tmp)) tmp++;
3873 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3874 goto error;
3875 tmp += 7;
3876 while (IS_BLANK(*tmp)) tmp++;
3877 if (*tmp != '=') {
3878 return;
3879 }
3880 tmp++;
3881 while (IS_BLANK(*tmp)) tmp++;
3882 marker = *tmp;
3883 if ((marker != '\'') && (marker != '"'))
3884 goto error;
3885 tmp++;
3886 base = tmp;
3887 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3888 if (*tmp == 0)
3889 goto error;
3890 URL = xmlStrndup(base, tmp - base);
3891 tmp++;
3892 while (IS_BLANK(*tmp)) tmp++;
3893 if (*tmp != 0)
3894 goto error;
3895
3896 if (URL != NULL) {
3897 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3898 xmlFree(URL);
3899 }
3900 return;
3901
3902error:
3903 ctxt->errNo = XML_WAR_CATALOG_PI;
3904 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3905 ctxt->sax->warning(ctxt->userData,
3906 "Catalog PI syntax error: %s\n", catalog);
3907 if (URL != NULL)
3908 xmlFree(URL);
3909}
3910#endif
3911
Owen Taylor3473f882001-02-23 17:55:21 +00003912/**
3913 * xmlParsePI:
3914 * @ctxt: an XML parser context
3915 *
3916 * parse an XML Processing Instruction.
3917 *
3918 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3919 *
3920 * The processing is transfered to SAX once parsed.
3921 */
3922
3923void
3924xmlParsePI(xmlParserCtxtPtr ctxt) {
3925 xmlChar *buf = NULL;
3926 int len = 0;
3927 int size = XML_PARSER_BUFFER_SIZE;
3928 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003929 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003930 xmlParserInputState state;
3931 int count = 0;
3932
3933 if ((RAW == '<') && (NXT(1) == '?')) {
3934 xmlParserInputPtr input = ctxt->input;
3935 state = ctxt->instate;
3936 ctxt->instate = XML_PARSER_PI;
3937 /*
3938 * this is a Processing Instruction.
3939 */
3940 SKIP(2);
3941 SHRINK;
3942
3943 /*
3944 * Parse the target name and check for special support like
3945 * namespace.
3946 */
3947 target = xmlParsePITarget(ctxt);
3948 if (target != NULL) {
3949 if ((RAW == '?') && (NXT(1) == '>')) {
3950 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003951 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3952 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003953 }
3954 SKIP(2);
3955
3956 /*
3957 * SAX: PI detected.
3958 */
3959 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3960 (ctxt->sax->processingInstruction != NULL))
3961 ctxt->sax->processingInstruction(ctxt->userData,
3962 target, NULL);
3963 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return;
3965 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003967 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003969 ctxt->instate = state;
3970 return;
3971 }
3972 cur = CUR;
3973 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003974 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3975 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003976 }
3977 SKIP_BLANKS;
3978 cur = CUR_CHAR(l);
3979 while (IS_CHAR(cur) && /* checked */
3980 ((cur != '?') || (NXT(1) != '>'))) {
3981 if (len + 5 >= size) {
3982 size *= 2;
3983 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3984 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003985 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003986 ctxt->instate = state;
3987 return;
3988 }
3989 }
3990 count++;
3991 if (count > 50) {
3992 GROW;
3993 count = 0;
3994 }
3995 COPY_BUF(l,buf,len,cur);
3996 NEXTL(l);
3997 cur = CUR_CHAR(l);
3998 if (cur == 0) {
3999 SHRINK;
4000 GROW;
4001 cur = CUR_CHAR(l);
4002 }
4003 }
4004 buf[len] = 0;
4005 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004006 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4007 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004008 } else {
4009 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4011 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004012 }
4013 SKIP(2);
4014
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004015#ifdef LIBXML_CATALOG_ENABLED
4016 if (((state == XML_PARSER_MISC) ||
4017 (state == XML_PARSER_START)) &&
4018 (xmlStrEqual(target, XML_CATALOG_PI))) {
4019 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4020 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4021 (allow == XML_CATA_ALLOW_ALL))
4022 xmlParseCatalogPI(ctxt, buf);
4023 }
4024#endif
4025
4026
Owen Taylor3473f882001-02-23 17:55:21 +00004027 /*
4028 * SAX: PI detected.
4029 */
4030 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4031 (ctxt->sax->processingInstruction != NULL))
4032 ctxt->sax->processingInstruction(ctxt->userData,
4033 target, buf);
4034 }
4035 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004036 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004037 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004038 }
4039 ctxt->instate = state;
4040 }
4041}
4042
4043/**
4044 * xmlParseNotationDecl:
4045 * @ctxt: an XML parser context
4046 *
4047 * parse a notation declaration
4048 *
4049 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4050 *
4051 * Hence there is actually 3 choices:
4052 * 'PUBLIC' S PubidLiteral
4053 * 'PUBLIC' S PubidLiteral S SystemLiteral
4054 * and 'SYSTEM' S SystemLiteral
4055 *
4056 * See the NOTE on xmlParseExternalID().
4057 */
4058
4059void
4060xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004061 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004062 xmlChar *Pubid;
4063 xmlChar *Systemid;
4064
4065 if ((RAW == '<') && (NXT(1) == '!') &&
4066 (NXT(2) == 'N') && (NXT(3) == 'O') &&
4067 (NXT(4) == 'T') && (NXT(5) == 'A') &&
4068 (NXT(6) == 'T') && (NXT(7) == 'I') &&
4069 (NXT(8) == 'O') && (NXT(9) == 'N')) {
4070 xmlParserInputPtr input = ctxt->input;
4071 SHRINK;
4072 SKIP(10);
4073 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4075 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004076 return;
4077 }
4078 SKIP_BLANKS;
4079
Daniel Veillard76d66f42001-05-16 21:05:17 +00004080 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004081 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004082 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004083 return;
4084 }
4085 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004086 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004087 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004088 return;
4089 }
4090 SKIP_BLANKS;
4091
4092 /*
4093 * Parse the IDs.
4094 */
4095 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4096 SKIP_BLANKS;
4097
4098 if (RAW == '>') {
4099 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4101 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004102 }
4103 NEXT;
4104 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4105 (ctxt->sax->notationDecl != NULL))
4106 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4107 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004108 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004109 }
Owen Taylor3473f882001-02-23 17:55:21 +00004110 if (Systemid != NULL) xmlFree(Systemid);
4111 if (Pubid != NULL) xmlFree(Pubid);
4112 }
4113}
4114
4115/**
4116 * xmlParseEntityDecl:
4117 * @ctxt: an XML parser context
4118 *
4119 * parse <!ENTITY declarations
4120 *
4121 * [70] EntityDecl ::= GEDecl | PEDecl
4122 *
4123 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4124 *
4125 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4126 *
4127 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4128 *
4129 * [74] PEDef ::= EntityValue | ExternalID
4130 *
4131 * [76] NDataDecl ::= S 'NDATA' S Name
4132 *
4133 * [ VC: Notation Declared ]
4134 * The Name must match the declared name of a notation.
4135 */
4136
4137void
4138xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004139 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004140 xmlChar *value = NULL;
4141 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004142 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004143 int isParameter = 0;
4144 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004145 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004146
4147 GROW;
4148 if ((RAW == '<') && (NXT(1) == '!') &&
4149 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4150 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4151 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
4152 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004153 SHRINK;
4154 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004155 skipped = SKIP_BLANKS;
4156 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004157 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4158 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004159 }
Owen Taylor3473f882001-02-23 17:55:21 +00004160
4161 if (RAW == '%') {
4162 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004163 skipped = SKIP_BLANKS;
4164 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004165 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4166 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004167 }
Owen Taylor3473f882001-02-23 17:55:21 +00004168 isParameter = 1;
4169 }
4170
Daniel Veillard76d66f42001-05-16 21:05:17 +00004171 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004172 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004173 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4174 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004175 return;
4176 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004177 skipped = SKIP_BLANKS;
4178 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004179 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4180 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004181 }
Owen Taylor3473f882001-02-23 17:55:21 +00004182
Daniel Veillardf5582f12002-06-11 10:08:16 +00004183 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004184 /*
4185 * handle the various case of definitions...
4186 */
4187 if (isParameter) {
4188 if ((RAW == '"') || (RAW == '\'')) {
4189 value = xmlParseEntityValue(ctxt, &orig);
4190 if (value) {
4191 if ((ctxt->sax != NULL) &&
4192 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4193 ctxt->sax->entityDecl(ctxt->userData, name,
4194 XML_INTERNAL_PARAMETER_ENTITY,
4195 NULL, NULL, value);
4196 }
4197 } else {
4198 URI = xmlParseExternalID(ctxt, &literal, 1);
4199 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004200 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004201 }
4202 if (URI) {
4203 xmlURIPtr uri;
4204
4205 uri = xmlParseURI((const char *) URI);
4206 if (uri == NULL) {
4207 ctxt->errNo = XML_ERR_INVALID_URI;
4208 if ((ctxt->sax != NULL) &&
4209 (!ctxt->disableSAX) &&
4210 (ctxt->sax->error != NULL))
4211 ctxt->sax->error(ctxt->userData,
4212 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004213 /*
4214 * This really ought to be a well formedness error
4215 * but the XML Core WG decided otherwise c.f. issue
4216 * E26 of the XML erratas.
4217 */
Owen Taylor3473f882001-02-23 17:55:21 +00004218 } else {
4219 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004220 /*
4221 * Okay this is foolish to block those but not
4222 * invalid URIs.
4223 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004224 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004225 } else {
4226 if ((ctxt->sax != NULL) &&
4227 (!ctxt->disableSAX) &&
4228 (ctxt->sax->entityDecl != NULL))
4229 ctxt->sax->entityDecl(ctxt->userData, name,
4230 XML_EXTERNAL_PARAMETER_ENTITY,
4231 literal, URI, NULL);
4232 }
4233 xmlFreeURI(uri);
4234 }
4235 }
4236 }
4237 } else {
4238 if ((RAW == '"') || (RAW == '\'')) {
4239 value = xmlParseEntityValue(ctxt, &orig);
4240 if ((ctxt->sax != NULL) &&
4241 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4242 ctxt->sax->entityDecl(ctxt->userData, name,
4243 XML_INTERNAL_GENERAL_ENTITY,
4244 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004245 /*
4246 * For expat compatibility in SAX mode.
4247 */
4248 if ((ctxt->myDoc == NULL) ||
4249 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4250 if (ctxt->myDoc == NULL) {
4251 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4252 }
4253 if (ctxt->myDoc->intSubset == NULL)
4254 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4255 BAD_CAST "fake", NULL, NULL);
4256
Daniel Veillard1af9a412003-08-20 22:54:39 +00004257 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4258 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004259 }
Owen Taylor3473f882001-02-23 17:55:21 +00004260 } else {
4261 URI = xmlParseExternalID(ctxt, &literal, 1);
4262 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004263 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 }
4265 if (URI) {
4266 xmlURIPtr uri;
4267
4268 uri = xmlParseURI((const char *)URI);
4269 if (uri == NULL) {
4270 ctxt->errNo = XML_ERR_INVALID_URI;
4271 if ((ctxt->sax != NULL) &&
4272 (!ctxt->disableSAX) &&
4273 (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004276 /*
4277 * This really ought to be a well formedness error
4278 * but the XML Core WG decided otherwise c.f. issue
4279 * E26 of the XML erratas.
4280 */
Owen Taylor3473f882001-02-23 17:55:21 +00004281 } else {
4282 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004283 /*
4284 * Okay this is foolish to block those but not
4285 * invalid URIs.
4286 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 }
4289 xmlFreeURI(uri);
4290 }
4291 }
4292 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4294 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004295 }
4296 SKIP_BLANKS;
4297 if ((RAW == 'N') && (NXT(1) == 'D') &&
4298 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4299 (NXT(4) == 'A')) {
4300 SKIP(5);
4301 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004302 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4303 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004304 }
4305 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004306 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004307 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4308 (ctxt->sax->unparsedEntityDecl != NULL))
4309 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4310 literal, URI, ndata);
4311 } else {
4312 if ((ctxt->sax != NULL) &&
4313 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4314 ctxt->sax->entityDecl(ctxt->userData, name,
4315 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4316 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004317 /*
4318 * For expat compatibility in SAX mode.
4319 * assuming the entity repalcement was asked for
4320 */
4321 if ((ctxt->replaceEntities != 0) &&
4322 ((ctxt->myDoc == NULL) ||
4323 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4324 if (ctxt->myDoc == NULL) {
4325 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4326 }
4327
4328 if (ctxt->myDoc->intSubset == NULL)
4329 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4330 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004331 xmlSAX2EntityDecl(ctxt, name,
4332 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4333 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004334 }
Owen Taylor3473f882001-02-23 17:55:21 +00004335 }
4336 }
4337 }
4338 SKIP_BLANKS;
4339 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004340 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004341 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004342 } else {
4343 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004344 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4345 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004346 }
4347 NEXT;
4348 }
4349 if (orig != NULL) {
4350 /*
4351 * Ugly mechanism to save the raw entity value.
4352 */
4353 xmlEntityPtr cur = NULL;
4354
4355 if (isParameter) {
4356 if ((ctxt->sax != NULL) &&
4357 (ctxt->sax->getParameterEntity != NULL))
4358 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4359 } else {
4360 if ((ctxt->sax != NULL) &&
4361 (ctxt->sax->getEntity != NULL))
4362 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004363 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004364 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004365 }
Owen Taylor3473f882001-02-23 17:55:21 +00004366 }
4367 if (cur != NULL) {
4368 if (cur->orig != NULL)
4369 xmlFree(orig);
4370 else
4371 cur->orig = orig;
4372 } else
4373 xmlFree(orig);
4374 }
Owen Taylor3473f882001-02-23 17:55:21 +00004375 if (value != NULL) xmlFree(value);
4376 if (URI != NULL) xmlFree(URI);
4377 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379}
4380
4381/**
4382 * xmlParseDefaultDecl:
4383 * @ctxt: an XML parser context
4384 * @value: Receive a possible fixed default value for the attribute
4385 *
4386 * Parse an attribute default declaration
4387 *
4388 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4389 *
4390 * [ VC: Required Attribute ]
4391 * if the default declaration is the keyword #REQUIRED, then the
4392 * attribute must be specified for all elements of the type in the
4393 * attribute-list declaration.
4394 *
4395 * [ VC: Attribute Default Legal ]
4396 * The declared default value must meet the lexical constraints of
4397 * the declared attribute type c.f. xmlValidateAttributeDecl()
4398 *
4399 * [ VC: Fixed Attribute Default ]
4400 * if an attribute has a default value declared with the #FIXED
4401 * keyword, instances of that attribute must match the default value.
4402 *
4403 * [ WFC: No < in Attribute Values ]
4404 * handled in xmlParseAttValue()
4405 *
4406 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4407 * or XML_ATTRIBUTE_FIXED.
4408 */
4409
4410int
4411xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4412 int val;
4413 xmlChar *ret;
4414
4415 *value = NULL;
4416 if ((RAW == '#') && (NXT(1) == 'R') &&
4417 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4418 (NXT(4) == 'U') && (NXT(5) == 'I') &&
4419 (NXT(6) == 'R') && (NXT(7) == 'E') &&
4420 (NXT(8) == 'D')) {
4421 SKIP(9);
4422 return(XML_ATTRIBUTE_REQUIRED);
4423 }
4424 if ((RAW == '#') && (NXT(1) == 'I') &&
4425 (NXT(2) == 'M') && (NXT(3) == 'P') &&
4426 (NXT(4) == 'L') && (NXT(5) == 'I') &&
4427 (NXT(6) == 'E') && (NXT(7) == 'D')) {
4428 SKIP(8);
4429 return(XML_ATTRIBUTE_IMPLIED);
4430 }
4431 val = XML_ATTRIBUTE_NONE;
4432 if ((RAW == '#') && (NXT(1) == 'F') &&
4433 (NXT(2) == 'I') && (NXT(3) == 'X') &&
4434 (NXT(4) == 'E') && (NXT(5) == 'D')) {
4435 SKIP(6);
4436 val = XML_ATTRIBUTE_FIXED;
4437 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4439 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004440 }
4441 SKIP_BLANKS;
4442 }
4443 ret = xmlParseAttValue(ctxt);
4444 ctxt->instate = XML_PARSER_DTD;
4445 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004446 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004447 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004448 } else
4449 *value = ret;
4450 return(val);
4451}
4452
4453/**
4454 * xmlParseNotationType:
4455 * @ctxt: an XML parser context
4456 *
4457 * parse an Notation attribute type.
4458 *
4459 * Note: the leading 'NOTATION' S part has already being parsed...
4460 *
4461 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4462 *
4463 * [ VC: Notation Attributes ]
4464 * Values of this type must match one of the notation names included
4465 * in the declaration; all notation names in the declaration must be declared.
4466 *
4467 * Returns: the notation attribute tree built while parsing
4468 */
4469
4470xmlEnumerationPtr
4471xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004472 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004473 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4474
4475 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004476 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004477 return(NULL);
4478 }
4479 SHRINK;
4480 do {
4481 NEXT;
4482 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004483 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004484 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004485 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4486 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004487 return(ret);
4488 }
4489 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004490 if (cur == NULL) return(ret);
4491 if (last == NULL) ret = last = cur;
4492 else {
4493 last->next = cur;
4494 last = cur;
4495 }
4496 SKIP_BLANKS;
4497 } while (RAW == '|');
4498 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004499 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004500 if ((last != NULL) && (last != ret))
4501 xmlFreeEnumeration(last);
4502 return(ret);
4503 }
4504 NEXT;
4505 return(ret);
4506}
4507
4508/**
4509 * xmlParseEnumerationType:
4510 * @ctxt: an XML parser context
4511 *
4512 * parse an Enumeration attribute type.
4513 *
4514 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4515 *
4516 * [ VC: Enumeration ]
4517 * Values of this type must match one of the Nmtoken tokens in
4518 * the declaration
4519 *
4520 * Returns: the enumeration attribute tree built while parsing
4521 */
4522
4523xmlEnumerationPtr
4524xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4525 xmlChar *name;
4526 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4527
4528 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004529 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004530 return(NULL);
4531 }
4532 SHRINK;
4533 do {
4534 NEXT;
4535 SKIP_BLANKS;
4536 name = xmlParseNmtoken(ctxt);
4537 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004538 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004539 return(ret);
4540 }
4541 cur = xmlCreateEnumeration(name);
4542 xmlFree(name);
4543 if (cur == NULL) return(ret);
4544 if (last == NULL) ret = last = cur;
4545 else {
4546 last->next = cur;
4547 last = cur;
4548 }
4549 SKIP_BLANKS;
4550 } while (RAW == '|');
4551 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004552 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004553 return(ret);
4554 }
4555 NEXT;
4556 return(ret);
4557}
4558
4559/**
4560 * xmlParseEnumeratedType:
4561 * @ctxt: an XML parser context
4562 * @tree: the enumeration tree built while parsing
4563 *
4564 * parse an Enumerated attribute type.
4565 *
4566 * [57] EnumeratedType ::= NotationType | Enumeration
4567 *
4568 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4569 *
4570 *
4571 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4572 */
4573
4574int
4575xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4576 if ((RAW == 'N') && (NXT(1) == 'O') &&
4577 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4578 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4579 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4580 SKIP(8);
4581 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004582 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4583 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004584 return(0);
4585 }
4586 SKIP_BLANKS;
4587 *tree = xmlParseNotationType(ctxt);
4588 if (*tree == NULL) return(0);
4589 return(XML_ATTRIBUTE_NOTATION);
4590 }
4591 *tree = xmlParseEnumerationType(ctxt);
4592 if (*tree == NULL) return(0);
4593 return(XML_ATTRIBUTE_ENUMERATION);
4594}
4595
4596/**
4597 * xmlParseAttributeType:
4598 * @ctxt: an XML parser context
4599 * @tree: the enumeration tree built while parsing
4600 *
4601 * parse the Attribute list def for an element
4602 *
4603 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4604 *
4605 * [55] StringType ::= 'CDATA'
4606 *
4607 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4608 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4609 *
4610 * Validity constraints for attribute values syntax are checked in
4611 * xmlValidateAttributeValue()
4612 *
4613 * [ VC: ID ]
4614 * Values of type ID must match the Name production. A name must not
4615 * appear more than once in an XML document as a value of this type;
4616 * i.e., ID values must uniquely identify the elements which bear them.
4617 *
4618 * [ VC: One ID per Element Type ]
4619 * No element type may have more than one ID attribute specified.
4620 *
4621 * [ VC: ID Attribute Default ]
4622 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4623 *
4624 * [ VC: IDREF ]
4625 * Values of type IDREF must match the Name production, and values
4626 * of type IDREFS must match Names; each IDREF Name must match the value
4627 * of an ID attribute on some element in the XML document; i.e. IDREF
4628 * values must match the value of some ID attribute.
4629 *
4630 * [ VC: Entity Name ]
4631 * Values of type ENTITY must match the Name production, values
4632 * of type ENTITIES must match Names; each Entity Name must match the
4633 * name of an unparsed entity declared in the DTD.
4634 *
4635 * [ VC: Name Token ]
4636 * Values of type NMTOKEN must match the Nmtoken production; values
4637 * of type NMTOKENS must match Nmtokens.
4638 *
4639 * Returns the attribute type
4640 */
4641int
4642xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4643 SHRINK;
4644 if ((RAW == 'C') && (NXT(1) == 'D') &&
4645 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4646 (NXT(4) == 'A')) {
4647 SKIP(5);
4648 return(XML_ATTRIBUTE_CDATA);
4649 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4650 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4651 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4652 SKIP(6);
4653 return(XML_ATTRIBUTE_IDREFS);
4654 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4655 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4656 (NXT(4) == 'F')) {
4657 SKIP(5);
4658 return(XML_ATTRIBUTE_IDREF);
4659 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4660 SKIP(2);
4661 return(XML_ATTRIBUTE_ID);
4662 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4663 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4664 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4665 SKIP(6);
4666 return(XML_ATTRIBUTE_ENTITY);
4667 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4668 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4669 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4670 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4671 SKIP(8);
4672 return(XML_ATTRIBUTE_ENTITIES);
4673 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4674 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4675 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4676 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4677 SKIP(8);
4678 return(XML_ATTRIBUTE_NMTOKENS);
4679 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4680 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4681 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4682 (NXT(6) == 'N')) {
4683 SKIP(7);
4684 return(XML_ATTRIBUTE_NMTOKEN);
4685 }
4686 return(xmlParseEnumeratedType(ctxt, tree));
4687}
4688
4689/**
4690 * xmlParseAttributeListDecl:
4691 * @ctxt: an XML parser context
4692 *
4693 * : parse the Attribute list def for an element
4694 *
4695 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4696 *
4697 * [53] AttDef ::= S Name S AttType S DefaultDecl
4698 *
4699 */
4700void
4701xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004702 const xmlChar *elemName;
4703 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004704 xmlEnumerationPtr tree;
4705
4706 if ((RAW == '<') && (NXT(1) == '!') &&
4707 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4708 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4709 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4710 (NXT(8) == 'T')) {
4711 xmlParserInputPtr input = ctxt->input;
4712
4713 SKIP(9);
4714 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004715 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004716 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004717 }
4718 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004719 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004720 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004721 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4722 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004723 return;
4724 }
4725 SKIP_BLANKS;
4726 GROW;
4727 while (RAW != '>') {
4728 const xmlChar *check = CUR_PTR;
4729 int type;
4730 int def;
4731 xmlChar *defaultValue = NULL;
4732
4733 GROW;
4734 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004735 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004736 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004737 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4738 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004739 break;
4740 }
4741 GROW;
4742 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004744 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004745 if (defaultValue != NULL)
4746 xmlFree(defaultValue);
4747 break;
4748 }
4749 SKIP_BLANKS;
4750
4751 type = xmlParseAttributeType(ctxt, &tree);
4752 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004753 if (defaultValue != NULL)
4754 xmlFree(defaultValue);
4755 break;
4756 }
4757
4758 GROW;
4759 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4761 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004762 if (defaultValue != NULL)
4763 xmlFree(defaultValue);
4764 if (tree != NULL)
4765 xmlFreeEnumeration(tree);
4766 break;
4767 }
4768 SKIP_BLANKS;
4769
4770 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4771 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004772 if (defaultValue != NULL)
4773 xmlFree(defaultValue);
4774 if (tree != NULL)
4775 xmlFreeEnumeration(tree);
4776 break;
4777 }
4778
4779 GROW;
4780 if (RAW != '>') {
4781 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004782 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004783 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004784 if (defaultValue != NULL)
4785 xmlFree(defaultValue);
4786 if (tree != NULL)
4787 xmlFreeEnumeration(tree);
4788 break;
4789 }
4790 SKIP_BLANKS;
4791 }
4792 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004793 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4794 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004795 if (defaultValue != NULL)
4796 xmlFree(defaultValue);
4797 if (tree != NULL)
4798 xmlFreeEnumeration(tree);
4799 break;
4800 }
4801 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4802 (ctxt->sax->attributeDecl != NULL))
4803 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4804 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004805 else if (tree != NULL)
4806 xmlFreeEnumeration(tree);
4807
4808 if ((ctxt->sax2) && (defaultValue != NULL) &&
4809 (def != XML_ATTRIBUTE_IMPLIED) &&
4810 (def != XML_ATTRIBUTE_REQUIRED)) {
4811 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4812 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004813 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4814 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4815 }
Owen Taylor3473f882001-02-23 17:55:21 +00004816 if (defaultValue != NULL)
4817 xmlFree(defaultValue);
4818 GROW;
4819 }
4820 if (RAW == '>') {
4821 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004822 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4823 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004824 }
4825 NEXT;
4826 }
Owen Taylor3473f882001-02-23 17:55:21 +00004827 }
4828}
4829
4830/**
4831 * xmlParseElementMixedContentDecl:
4832 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004833 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004834 *
4835 * parse the declaration for a Mixed Element content
4836 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4837 *
4838 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4839 * '(' S? '#PCDATA' S? ')'
4840 *
4841 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4842 *
4843 * [ VC: No Duplicate Types ]
4844 * The same name must not appear more than once in a single
4845 * mixed-content declaration.
4846 *
4847 * returns: the list of the xmlElementContentPtr describing the element choices
4848 */
4849xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004850xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004851 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004852 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004853
4854 GROW;
4855 if ((RAW == '#') && (NXT(1) == 'P') &&
4856 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4857 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4858 (NXT(6) == 'A')) {
4859 SKIP(7);
4860 SKIP_BLANKS;
4861 SHRINK;
4862 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004863 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004864 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4865 if (ctxt->vctxt.error != NULL)
4866 ctxt->vctxt.error(ctxt->vctxt.userData,
4867"Element content declaration doesn't start and stop in the same entity\n");
4868 ctxt->valid = 0;
4869 }
Owen Taylor3473f882001-02-23 17:55:21 +00004870 NEXT;
4871 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4872 if (RAW == '*') {
4873 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4874 NEXT;
4875 }
4876 return(ret);
4877 }
4878 if ((RAW == '(') || (RAW == '|')) {
4879 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4880 if (ret == NULL) return(NULL);
4881 }
4882 while (RAW == '|') {
4883 NEXT;
4884 if (elem == NULL) {
4885 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4886 if (ret == NULL) return(NULL);
4887 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004888 if (cur != NULL)
4889 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004890 cur = ret;
4891 } else {
4892 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4893 if (n == NULL) return(NULL);
4894 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004895 if (n->c1 != NULL)
4896 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004898 if (n != NULL)
4899 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004900 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004901 }
4902 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004903 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004904 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004905 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004906 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004907 xmlFreeElementContent(cur);
4908 return(NULL);
4909 }
4910 SKIP_BLANKS;
4911 GROW;
4912 }
4913 if ((RAW == ')') && (NXT(1) == '*')) {
4914 if (elem != NULL) {
4915 cur->c2 = xmlNewElementContent(elem,
4916 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004917 if (cur->c2 != NULL)
4918 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004919 }
4920 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004921 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004922 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4923 if (ctxt->vctxt.error != NULL)
4924 ctxt->vctxt.error(ctxt->vctxt.userData,
4925"Element content declaration doesn't start and stop in the same entity\n");
4926 ctxt->valid = 0;
4927 }
Owen Taylor3473f882001-02-23 17:55:21 +00004928 SKIP(2);
4929 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004930 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004931 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004932 return(NULL);
4933 }
4934
4935 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004936 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004937 }
4938 return(ret);
4939}
4940
4941/**
4942 * xmlParseElementChildrenContentDecl:
4943 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004944 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004945 *
4946 * parse the declaration for a Mixed Element content
4947 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4948 *
4949 *
4950 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4951 *
4952 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4953 *
4954 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4955 *
4956 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4957 *
4958 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4959 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004960 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004961 * opening or closing parentheses in a choice, seq, or Mixed
4962 * construct is contained in the replacement text for a parameter
4963 * entity, both must be contained in the same replacement text. For
4964 * interoperability, if a parameter-entity reference appears in a
4965 * choice, seq, or Mixed construct, its replacement text should not
4966 * be empty, and neither the first nor last non-blank character of
4967 * the replacement text should be a connector (| or ,).
4968 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004969 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004970 * hierarchy.
4971 */
4972xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004973xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004974 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004975 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004976 xmlChar type = 0;
4977
4978 SKIP_BLANKS;
4979 GROW;
4980 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004981 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004982
Owen Taylor3473f882001-02-23 17:55:21 +00004983 /* Recurse on first child */
4984 NEXT;
4985 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004986 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004987 SKIP_BLANKS;
4988 GROW;
4989 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004990 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004992 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 return(NULL);
4994 }
4995 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004996 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004997 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004998 return(NULL);
4999 }
Owen Taylor3473f882001-02-23 17:55:21 +00005000 GROW;
5001 if (RAW == '?') {
5002 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5003 NEXT;
5004 } else if (RAW == '*') {
5005 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5006 NEXT;
5007 } else if (RAW == '+') {
5008 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5009 NEXT;
5010 } else {
5011 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5012 }
Owen Taylor3473f882001-02-23 17:55:21 +00005013 GROW;
5014 }
5015 SKIP_BLANKS;
5016 SHRINK;
5017 while (RAW != ')') {
5018 /*
5019 * Each loop we parse one separator and one element.
5020 */
5021 if (RAW == ',') {
5022 if (type == 0) type = CUR;
5023
5024 /*
5025 * Detect "Name | Name , Name" error
5026 */
5027 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005028 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005029 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005030 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005031 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005032 xmlFreeElementContent(last);
5033 if (ret != NULL)
5034 xmlFreeElementContent(ret);
5035 return(NULL);
5036 }
5037 NEXT;
5038
5039 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5040 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005041 if ((last != NULL) && (last != ret))
5042 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005043 xmlFreeElementContent(ret);
5044 return(NULL);
5045 }
5046 if (last == NULL) {
5047 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005048 if (ret != NULL)
5049 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005050 ret = cur = op;
5051 } else {
5052 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005053 if (op != NULL)
5054 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005055 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005056 if (last != NULL)
5057 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005058 cur =op;
5059 last = NULL;
5060 }
5061 } else if (RAW == '|') {
5062 if (type == 0) type = CUR;
5063
5064 /*
5065 * Detect "Name , Name | Name" error
5066 */
5067 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005068 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005069 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005070 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005071 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005072 xmlFreeElementContent(last);
5073 if (ret != NULL)
5074 xmlFreeElementContent(ret);
5075 return(NULL);
5076 }
5077 NEXT;
5078
5079 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5080 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005081 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005082 xmlFreeElementContent(last);
5083 if (ret != NULL)
5084 xmlFreeElementContent(ret);
5085 return(NULL);
5086 }
5087 if (last == NULL) {
5088 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005089 if (ret != NULL)
5090 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 ret = cur = op;
5092 } else {
5093 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005094 if (op != NULL)
5095 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005096 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005097 if (last != NULL)
5098 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005099 cur =op;
5100 last = NULL;
5101 }
5102 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005103 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005104 if (ret != NULL)
5105 xmlFreeElementContent(ret);
5106 return(NULL);
5107 }
5108 GROW;
5109 SKIP_BLANKS;
5110 GROW;
5111 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005112 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005113 /* Recurse on second child */
5114 NEXT;
5115 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005116 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005117 SKIP_BLANKS;
5118 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005119 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005120 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005121 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if (ret != NULL)
5123 xmlFreeElementContent(ret);
5124 return(NULL);
5125 }
5126 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 if (RAW == '?') {
5128 last->ocur = XML_ELEMENT_CONTENT_OPT;
5129 NEXT;
5130 } else if (RAW == '*') {
5131 last->ocur = XML_ELEMENT_CONTENT_MULT;
5132 NEXT;
5133 } else if (RAW == '+') {
5134 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5135 NEXT;
5136 } else {
5137 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5138 }
5139 }
5140 SKIP_BLANKS;
5141 GROW;
5142 }
5143 if ((cur != NULL) && (last != NULL)) {
5144 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005145 if (last != NULL)
5146 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005147 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005148 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005149 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5150 if (ctxt->vctxt.error != NULL)
5151 ctxt->vctxt.error(ctxt->vctxt.userData,
5152"Element content declaration doesn't start and stop in the same entity\n");
5153 ctxt->valid = 0;
5154 }
Owen Taylor3473f882001-02-23 17:55:21 +00005155 NEXT;
5156 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005157 if (ret != NULL)
5158 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005159 NEXT;
5160 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005161 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005162 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005163 cur = ret;
5164 /*
5165 * Some normalization:
5166 * (a | b* | c?)* == (a | b | c)*
5167 */
5168 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5169 if ((cur->c1 != NULL) &&
5170 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5171 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5172 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5173 if ((cur->c2 != NULL) &&
5174 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5175 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5176 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5177 cur = cur->c2;
5178 }
5179 }
Owen Taylor3473f882001-02-23 17:55:21 +00005180 NEXT;
5181 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005182 if (ret != NULL) {
5183 int found = 0;
5184
Daniel Veillarde470df72001-04-18 21:41:07 +00005185 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005186 /*
5187 * Some normalization:
5188 * (a | b*)+ == (a | b)*
5189 * (a | b?)+ == (a | b)*
5190 */
5191 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5192 if ((cur->c1 != NULL) &&
5193 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5194 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5195 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5196 found = 1;
5197 }
5198 if ((cur->c2 != NULL) &&
5199 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5200 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5201 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5202 found = 1;
5203 }
5204 cur = cur->c2;
5205 }
5206 if (found)
5207 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5208 }
Owen Taylor3473f882001-02-23 17:55:21 +00005209 NEXT;
5210 }
5211 return(ret);
5212}
5213
5214/**
5215 * xmlParseElementContentDecl:
5216 * @ctxt: an XML parser context
5217 * @name: the name of the element being defined.
5218 * @result: the Element Content pointer will be stored here if any
5219 *
5220 * parse the declaration for an Element content either Mixed or Children,
5221 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5222 *
5223 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5224 *
5225 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5226 */
5227
5228int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005229xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005230 xmlElementContentPtr *result) {
5231
5232 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005233 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005234 int res;
5235
5236 *result = NULL;
5237
5238 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005239 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005240 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 return(-1);
5242 }
5243 NEXT;
5244 GROW;
5245 SKIP_BLANKS;
5246 if ((RAW == '#') && (NXT(1) == 'P') &&
5247 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5248 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5249 (NXT(6) == 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005250 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005251 res = XML_ELEMENT_TYPE_MIXED;
5252 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005253 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005254 res = XML_ELEMENT_TYPE_ELEMENT;
5255 }
Owen Taylor3473f882001-02-23 17:55:21 +00005256 SKIP_BLANKS;
5257 *result = tree;
5258 return(res);
5259}
5260
5261/**
5262 * xmlParseElementDecl:
5263 * @ctxt: an XML parser context
5264 *
5265 * parse an Element declaration.
5266 *
5267 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5268 *
5269 * [ VC: Unique Element Type Declaration ]
5270 * No element type may be declared more than once
5271 *
5272 * Returns the type of the element, or -1 in case of error
5273 */
5274int
5275xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005276 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 int ret = -1;
5278 xmlElementContentPtr content = NULL;
5279
5280 GROW;
5281 if ((RAW == '<') && (NXT(1) == '!') &&
5282 (NXT(2) == 'E') && (NXT(3) == 'L') &&
5283 (NXT(4) == 'E') && (NXT(5) == 'M') &&
5284 (NXT(6) == 'E') && (NXT(7) == 'N') &&
5285 (NXT(8) == 'T')) {
5286 xmlParserInputPtr input = ctxt->input;
5287
5288 SKIP(9);
5289 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005290 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5291 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005292 }
5293 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005294 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005295 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005296 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5297 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005298 return(-1);
5299 }
5300 while ((RAW == 0) && (ctxt->inputNr > 1))
5301 xmlPopInput(ctxt);
5302 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005303 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5304 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005305 }
5306 SKIP_BLANKS;
5307 if ((RAW == 'E') && (NXT(1) == 'M') &&
5308 (NXT(2) == 'P') && (NXT(3) == 'T') &&
5309 (NXT(4) == 'Y')) {
5310 SKIP(5);
5311 /*
5312 * Element must always be empty.
5313 */
5314 ret = XML_ELEMENT_TYPE_EMPTY;
5315 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5316 (NXT(2) == 'Y')) {
5317 SKIP(3);
5318 /*
5319 * Element is a generic container.
5320 */
5321 ret = XML_ELEMENT_TYPE_ANY;
5322 } else if (RAW == '(') {
5323 ret = xmlParseElementContentDecl(ctxt, name, &content);
5324 } else {
5325 /*
5326 * [ WFC: PEs in Internal Subset ] error handling.
5327 */
5328 if ((RAW == '%') && (ctxt->external == 0) &&
5329 (ctxt->inputNr == 1)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005330 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
5331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5332 ctxt->sax->error(ctxt->userData,
5333 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005334 } else {
5335 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
5336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5337 ctxt->sax->error(ctxt->userData,
5338 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5339 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005340 ctxt->wellFormed = 0;
5341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 return(-1);
5343 }
5344
5345 SKIP_BLANKS;
5346 /*
5347 * Pop-up of finished entities.
5348 */
5349 while ((RAW == 0) && (ctxt->inputNr > 1))
5350 xmlPopInput(ctxt);
5351 SKIP_BLANKS;
5352
5353 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005354 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005355 } else {
5356 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5358 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360
5361 NEXT;
5362 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5363 (ctxt->sax->elementDecl != NULL))
5364 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5365 content);
5366 }
5367 if (content != NULL) {
5368 xmlFreeElementContent(content);
5369 }
Owen Taylor3473f882001-02-23 17:55:21 +00005370 }
5371 return(ret);
5372}
5373
5374/**
Owen Taylor3473f882001-02-23 17:55:21 +00005375 * xmlParseConditionalSections
5376 * @ctxt: an XML parser context
5377 *
5378 * [61] conditionalSect ::= includeSect | ignoreSect
5379 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5380 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5381 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5382 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5383 */
5384
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005385static void
Owen Taylor3473f882001-02-23 17:55:21 +00005386xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5387 SKIP(3);
5388 SKIP_BLANKS;
5389 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5390 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5391 (NXT(6) == 'E')) {
5392 SKIP(7);
5393 SKIP_BLANKS;
5394 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005395 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005396 } else {
5397 NEXT;
5398 }
5399 if (xmlParserDebugEntities) {
5400 if ((ctxt->input != NULL) && (ctxt->input->filename))
5401 xmlGenericError(xmlGenericErrorContext,
5402 "%s(%d): ", ctxt->input->filename,
5403 ctxt->input->line);
5404 xmlGenericError(xmlGenericErrorContext,
5405 "Entering INCLUDE Conditional Section\n");
5406 }
5407
5408 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5409 (NXT(2) != '>'))) {
5410 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005411 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005412
5413 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5414 xmlParseConditionalSections(ctxt);
5415 } else if (IS_BLANK(CUR)) {
5416 NEXT;
5417 } else if (RAW == '%') {
5418 xmlParsePEReference(ctxt);
5419 } else
5420 xmlParseMarkupDecl(ctxt);
5421
5422 /*
5423 * Pop-up of finished entities.
5424 */
5425 while ((RAW == 0) && (ctxt->inputNr > 1))
5426 xmlPopInput(ctxt);
5427
Daniel Veillardfdc91562002-07-01 21:52:03 +00005428 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005429 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005430 break;
5431 }
5432 }
5433 if (xmlParserDebugEntities) {
5434 if ((ctxt->input != NULL) && (ctxt->input->filename))
5435 xmlGenericError(xmlGenericErrorContext,
5436 "%s(%d): ", ctxt->input->filename,
5437 ctxt->input->line);
5438 xmlGenericError(xmlGenericErrorContext,
5439 "Leaving INCLUDE Conditional Section\n");
5440 }
5441
5442 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5443 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5444 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005445 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005446 int depth = 0;
5447
5448 SKIP(6);
5449 SKIP_BLANKS;
5450 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005451 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005452 } else {
5453 NEXT;
5454 }
5455 if (xmlParserDebugEntities) {
5456 if ((ctxt->input != NULL) && (ctxt->input->filename))
5457 xmlGenericError(xmlGenericErrorContext,
5458 "%s(%d): ", ctxt->input->filename,
5459 ctxt->input->line);
5460 xmlGenericError(xmlGenericErrorContext,
5461 "Entering IGNORE Conditional Section\n");
5462 }
5463
5464 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005465 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005466 * But disable SAX event generating DTD building in the meantime
5467 */
5468 state = ctxt->disableSAX;
5469 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005470 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005471 ctxt->instate = XML_PARSER_IGNORE;
5472
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005473 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005474 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5475 depth++;
5476 SKIP(3);
5477 continue;
5478 }
5479 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5480 if (--depth >= 0) SKIP(3);
5481 continue;
5482 }
5483 NEXT;
5484 continue;
5485 }
5486
5487 ctxt->disableSAX = state;
5488 ctxt->instate = instate;
5489
5490 if (xmlParserDebugEntities) {
5491 if ((ctxt->input != NULL) && (ctxt->input->filename))
5492 xmlGenericError(xmlGenericErrorContext,
5493 "%s(%d): ", ctxt->input->filename,
5494 ctxt->input->line);
5495 xmlGenericError(xmlGenericErrorContext,
5496 "Leaving IGNORE Conditional Section\n");
5497 }
5498
5499 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005500 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005501 }
5502
5503 if (RAW == 0)
5504 SHRINK;
5505
5506 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005507 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005508 } else {
5509 SKIP(3);
5510 }
5511}
5512
5513/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005514 * xmlParseMarkupDecl:
5515 * @ctxt: an XML parser context
5516 *
5517 * parse Markup declarations
5518 *
5519 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5520 * NotationDecl | PI | Comment
5521 *
5522 * [ VC: Proper Declaration/PE Nesting ]
5523 * Parameter-entity replacement text must be properly nested with
5524 * markup declarations. That is to say, if either the first character
5525 * or the last character of a markup declaration (markupdecl above) is
5526 * contained in the replacement text for a parameter-entity reference,
5527 * both must be contained in the same replacement text.
5528 *
5529 * [ WFC: PEs in Internal Subset ]
5530 * In the internal DTD subset, parameter-entity references can occur
5531 * only where markup declarations can occur, not within markup declarations.
5532 * (This does not apply to references that occur in external parameter
5533 * entities or to the external subset.)
5534 */
5535void
5536xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5537 GROW;
5538 xmlParseElementDecl(ctxt);
5539 xmlParseAttributeListDecl(ctxt);
5540 xmlParseEntityDecl(ctxt);
5541 xmlParseNotationDecl(ctxt);
5542 xmlParsePI(ctxt);
5543 xmlParseComment(ctxt);
5544 /*
5545 * This is only for internal subset. On external entities,
5546 * the replacement is done before parsing stage
5547 */
5548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5549 xmlParsePEReference(ctxt);
5550
5551 /*
5552 * Conditional sections are allowed from entities included
5553 * by PE References in the internal subset.
5554 */
5555 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5556 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5557 xmlParseConditionalSections(ctxt);
5558 }
5559 }
5560
5561 ctxt->instate = XML_PARSER_DTD;
5562}
5563
5564/**
5565 * xmlParseTextDecl:
5566 * @ctxt: an XML parser context
5567 *
5568 * parse an XML declaration header for external entities
5569 *
5570 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5571 *
5572 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5573 */
5574
5575void
5576xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5577 xmlChar *version;
5578
5579 /*
5580 * We know that '<?xml' is here.
5581 */
5582 if ((RAW == '<') && (NXT(1) == '?') &&
5583 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5584 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5585 SKIP(5);
5586 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005587 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005588 return;
5589 }
5590
5591 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005592 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5593 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005594 }
5595 SKIP_BLANKS;
5596
5597 /*
5598 * We may have the VersionInfo here.
5599 */
5600 version = xmlParseVersionInfo(ctxt);
5601 if (version == NULL)
5602 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005603 else {
5604 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5606 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005607 }
5608 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005609 ctxt->input->version = version;
5610
5611 /*
5612 * We must have the encoding declaration
5613 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005614 xmlParseEncodingDecl(ctxt);
5615 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5616 /*
5617 * The XML REC instructs us to stop parsing right here
5618 */
5619 return;
5620 }
5621
5622 SKIP_BLANKS;
5623 if ((RAW == '?') && (NXT(1) == '>')) {
5624 SKIP(2);
5625 } else if (RAW == '>') {
5626 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005627 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005628 NEXT;
5629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005630 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005631 MOVETO_ENDTAG(CUR_PTR);
5632 NEXT;
5633 }
5634}
5635
5636/**
Owen Taylor3473f882001-02-23 17:55:21 +00005637 * xmlParseExternalSubset:
5638 * @ctxt: an XML parser context
5639 * @ExternalID: the external identifier
5640 * @SystemID: the system identifier (or URL)
5641 *
5642 * parse Markup declarations from an external subset
5643 *
5644 * [30] extSubset ::= textDecl? extSubsetDecl
5645 *
5646 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5647 */
5648void
5649xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5650 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005651 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005652 GROW;
5653 if ((RAW == '<') && (NXT(1) == '?') &&
5654 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5655 (NXT(4) == 'l')) {
5656 xmlParseTextDecl(ctxt);
5657 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5658 /*
5659 * The XML REC instructs us to stop parsing right here
5660 */
5661 ctxt->instate = XML_PARSER_EOF;
5662 return;
5663 }
5664 }
5665 if (ctxt->myDoc == NULL) {
5666 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5667 }
5668 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5669 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5670
5671 ctxt->instate = XML_PARSER_DTD;
5672 ctxt->external = 1;
5673 while (((RAW == '<') && (NXT(1) == '?')) ||
5674 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005675 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005676 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005677 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005678
5679 GROW;
5680 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5681 xmlParseConditionalSections(ctxt);
5682 } else if (IS_BLANK(CUR)) {
5683 NEXT;
5684 } else if (RAW == '%') {
5685 xmlParsePEReference(ctxt);
5686 } else
5687 xmlParseMarkupDecl(ctxt);
5688
5689 /*
5690 * Pop-up of finished entities.
5691 */
5692 while ((RAW == 0) && (ctxt->inputNr > 1))
5693 xmlPopInput(ctxt);
5694
Daniel Veillardfdc91562002-07-01 21:52:03 +00005695 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005696 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005697 break;
5698 }
5699 }
5700
5701 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005702 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005703 }
5704
5705}
5706
5707/**
5708 * xmlParseReference:
5709 * @ctxt: an XML parser context
5710 *
5711 * parse and handle entity references in content, depending on the SAX
5712 * interface, this may end-up in a call to character() if this is a
5713 * CharRef, a predefined entity, if there is no reference() callback.
5714 * or if the parser was asked to switch to that mode.
5715 *
5716 * [67] Reference ::= EntityRef | CharRef
5717 */
5718void
5719xmlParseReference(xmlParserCtxtPtr ctxt) {
5720 xmlEntityPtr ent;
5721 xmlChar *val;
5722 if (RAW != '&') return;
5723
5724 if (NXT(1) == '#') {
5725 int i = 0;
5726 xmlChar out[10];
5727 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005728 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005729
5730 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5731 /*
5732 * So we are using non-UTF-8 buffers
5733 * Check that the char fit on 8bits, if not
5734 * generate a CharRef.
5735 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005736 if (value <= 0xFF) {
5737 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005738 out[1] = 0;
5739 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5740 (!ctxt->disableSAX))
5741 ctxt->sax->characters(ctxt->userData, out, 1);
5742 } else {
5743 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005744 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005746 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5748 (!ctxt->disableSAX))
5749 ctxt->sax->reference(ctxt->userData, out);
5750 }
5751 } else {
5752 /*
5753 * Just encode the value in UTF-8
5754 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005755 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005756 out[i] = 0;
5757 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5758 (!ctxt->disableSAX))
5759 ctxt->sax->characters(ctxt->userData, out, i);
5760 }
5761 } else {
5762 ent = xmlParseEntityRef(ctxt);
5763 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005764 if (!ctxt->wellFormed)
5765 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005766 if ((ent->name != NULL) &&
5767 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5768 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005769 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005770
5771
5772 /*
5773 * The first reference to the entity trigger a parsing phase
5774 * where the ent->children is filled with the result from
5775 * the parsing.
5776 */
5777 if (ent->children == NULL) {
5778 xmlChar *value;
5779 value = ent->content;
5780
5781 /*
5782 * Check that this entity is well formed
5783 */
5784 if ((value != NULL) &&
5785 (value[1] == 0) && (value[0] == '<') &&
5786 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5787 /*
5788 * DONE: get definite answer on this !!!
5789 * Lots of entity decls are used to declare a single
5790 * char
5791 * <!ENTITY lt "<">
5792 * Which seems to be valid since
5793 * 2.4: The ampersand character (&) and the left angle
5794 * bracket (<) may appear in their literal form only
5795 * when used ... They are also legal within the literal
5796 * entity value of an internal entity declaration;i
5797 * see "4.3.2 Well-Formed Parsed Entities".
5798 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5799 * Looking at the OASIS test suite and James Clark
5800 * tests, this is broken. However the XML REC uses
5801 * it. Is the XML REC not well-formed ????
5802 * This is a hack to avoid this problem
5803 *
5804 * ANSWER: since lt gt amp .. are already defined,
5805 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005806 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005807 * is lousy but acceptable.
5808 */
5809 list = xmlNewDocText(ctxt->myDoc, value);
5810 if (list != NULL) {
5811 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5812 (ent->children == NULL)) {
5813 ent->children = list;
5814 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005815 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005816 list->parent = (xmlNodePtr) ent;
5817 } else {
5818 xmlFreeNodeList(list);
5819 }
5820 } else if (list != NULL) {
5821 xmlFreeNodeList(list);
5822 }
5823 } else {
5824 /*
5825 * 4.3.2: An internal general parsed entity is well-formed
5826 * if its replacement text matches the production labeled
5827 * content.
5828 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005829
5830 void *user_data;
5831 /*
5832 * This is a bit hackish but this seems the best
5833 * way to make sure both SAX and DOM entity support
5834 * behaves okay.
5835 */
5836 if (ctxt->userData == ctxt)
5837 user_data = NULL;
5838 else
5839 user_data = ctxt->userData;
5840
Owen Taylor3473f882001-02-23 17:55:21 +00005841 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5842 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005843 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5844 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005845 ctxt->depth--;
5846 } else if (ent->etype ==
5847 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5848 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005849 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005850 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005851 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005852 ctxt->depth--;
5853 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005854 ret = XML_ERR_ENTITY_PE_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00005855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5856 ctxt->sax->error(ctxt->userData,
5857 "Internal: invalid entity type\n");
5858 }
5859 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005860 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005861 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005862 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005863 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5864 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005865 (ent->children == NULL)) {
5866 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005867 if (ctxt->replaceEntities) {
5868 /*
5869 * Prune it directly in the generated document
5870 * except for single text nodes.
5871 */
5872 if ((list->type == XML_TEXT_NODE) &&
5873 (list->next == NULL)) {
5874 list->parent = (xmlNodePtr) ent;
5875 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005876 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005877 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005878 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005879 while (list != NULL) {
5880 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005881 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005882 if (list->next == NULL)
5883 ent->last = list;
5884 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005885 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005886 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005887 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5888 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005889 }
5890 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005891 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005892 while (list != NULL) {
5893 list->parent = (xmlNodePtr) ent;
5894 if (list->next == NULL)
5895 ent->last = list;
5896 list = list->next;
5897 }
Owen Taylor3473f882001-02-23 17:55:21 +00005898 }
5899 } else {
5900 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005901 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005903 } else if ((ret != XML_ERR_OK) &&
5904 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005905 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005906 } else if (list != NULL) {
5907 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005908 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005909 }
5910 }
5911 }
5912 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5913 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5914 /*
5915 * Create a node.
5916 */
5917 ctxt->sax->reference(ctxt->userData, ent->name);
5918 return;
5919 } else if (ctxt->replaceEntities) {
5920 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5921 /*
5922 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005923 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005924 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005925 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005926 if ((list == NULL) && (ent->owner == 0)) {
5927 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005928 cur = ent->children;
5929 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005930 nw = xmlCopyNode(cur, 1);
5931 if (nw != NULL) {
5932 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005933 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005934 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005935 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005936 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005937 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005938 if (cur == ent->last)
5939 break;
5940 cur = cur->next;
5941 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005942 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005943 xmlAddEntityReference(ent, firstChild, nw);
5944 } else if (list == NULL) {
5945 xmlNodePtr nw = NULL, cur, next, last,
5946 firstChild = NULL;
5947 /*
5948 * Copy the entity child list and make it the new
5949 * entity child list. The goal is to make sure any
5950 * ID or REF referenced will be the one from the
5951 * document content and not the entity copy.
5952 */
5953 cur = ent->children;
5954 ent->children = NULL;
5955 last = ent->last;
5956 ent->last = NULL;
5957 while (cur != NULL) {
5958 next = cur->next;
5959 cur->next = NULL;
5960 cur->parent = NULL;
5961 nw = xmlCopyNode(cur, 1);
5962 if (nw != NULL) {
5963 nw->_private = cur->_private;
5964 if (firstChild == NULL){
5965 firstChild = cur;
5966 }
5967 xmlAddChild((xmlNodePtr) ent, nw);
5968 xmlAddChild(ctxt->node, cur);
5969 }
5970 if (cur == last)
5971 break;
5972 cur = next;
5973 }
5974 ent->owner = 1;
5975 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5976 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005977 } else {
5978 /*
5979 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005980 * node with a possible previous text one which
5981 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005982 */
5983 if (ent->children->type == XML_TEXT_NODE)
5984 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5985 if ((ent->last != ent->children) &&
5986 (ent->last->type == XML_TEXT_NODE))
5987 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5988 xmlAddChildList(ctxt->node, ent->children);
5989 }
5990
Owen Taylor3473f882001-02-23 17:55:21 +00005991 /*
5992 * This is to avoid a nasty side effect, see
5993 * characters() in SAX.c
5994 */
5995 ctxt->nodemem = 0;
5996 ctxt->nodelen = 0;
5997 return;
5998 } else {
5999 /*
6000 * Probably running in SAX mode
6001 */
6002 xmlParserInputPtr input;
6003
6004 input = xmlNewEntityInputStream(ctxt, ent);
6005 xmlPushInput(ctxt, input);
6006 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6007 (RAW == '<') && (NXT(1) == '?') &&
6008 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6009 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6010 xmlParseTextDecl(ctxt);
6011 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6012 /*
6013 * The XML REC instructs us to stop parsing right here
6014 */
6015 ctxt->instate = XML_PARSER_EOF;
6016 return;
6017 }
6018 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006019 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6020 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006021 }
6022 }
6023 return;
6024 }
6025 }
6026 } else {
6027 val = ent->content;
6028 if (val == NULL) return;
6029 /*
6030 * inline the entity.
6031 */
6032 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6033 (!ctxt->disableSAX))
6034 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6035 }
6036 }
6037}
6038
6039/**
6040 * xmlParseEntityRef:
6041 * @ctxt: an XML parser context
6042 *
6043 * parse ENTITY references declarations
6044 *
6045 * [68] EntityRef ::= '&' Name ';'
6046 *
6047 * [ WFC: Entity Declared ]
6048 * In a document without any DTD, a document with only an internal DTD
6049 * subset which contains no parameter entity references, or a document
6050 * with "standalone='yes'", the Name given in the entity reference
6051 * must match that in an entity declaration, except that well-formed
6052 * documents need not declare any of the following entities: amp, lt,
6053 * gt, apos, quot. The declaration of a parameter entity must precede
6054 * any reference to it. Similarly, the declaration of a general entity
6055 * must precede any reference to it which appears in a default value in an
6056 * attribute-list declaration. Note that if entities are declared in the
6057 * external subset or in external parameter entities, a non-validating
6058 * processor is not obligated to read and process their declarations;
6059 * for such documents, the rule that an entity must be declared is a
6060 * well-formedness constraint only if standalone='yes'.
6061 *
6062 * [ WFC: Parsed Entity ]
6063 * An entity reference must not contain the name of an unparsed entity
6064 *
6065 * Returns the xmlEntityPtr if found, or NULL otherwise.
6066 */
6067xmlEntityPtr
6068xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006069 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006070 xmlEntityPtr ent = NULL;
6071
6072 GROW;
6073
6074 if (RAW == '&') {
6075 NEXT;
6076 name = xmlParseName(ctxt);
6077 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006078 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006080 } else {
6081 if (RAW == ';') {
6082 NEXT;
6083 /*
6084 * Ask first SAX for entity resolution, otherwise try the
6085 * predefined set.
6086 */
6087 if (ctxt->sax != NULL) {
6088 if (ctxt->sax->getEntity != NULL)
6089 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006090 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006091 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006092 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6093 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006094 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006095 }
Owen Taylor3473f882001-02-23 17:55:21 +00006096 }
6097 /*
6098 * [ WFC: Entity Declared ]
6099 * In a document without any DTD, a document with only an
6100 * internal DTD subset which contains no parameter entity
6101 * references, or a document with "standalone='yes'", the
6102 * Name given in the entity reference must match that in an
6103 * entity declaration, except that well-formed documents
6104 * need not declare any of the following entities: amp, lt,
6105 * gt, apos, quot.
6106 * The declaration of a parameter entity must precede any
6107 * reference to it.
6108 * Similarly, the declaration of a general entity must
6109 * precede any reference to it which appears in a default
6110 * value in an attribute-list declaration. Note that if
6111 * entities are declared in the external subset or in
6112 * external parameter entities, a non-validating processor
6113 * is not obligated to read and process their declarations;
6114 * for such documents, the rule that an entity must be
6115 * declared is a well-formedness constraint only if
6116 * standalone='yes'.
6117 */
6118 if (ent == NULL) {
6119 if ((ctxt->standalone == 1) ||
6120 ((ctxt->hasExternalSubset == 0) &&
6121 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006122 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006123 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006124 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006125 } else {
6126 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00006127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00006128 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00006129 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006130 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006131 }
6132 }
6133
6134 /*
6135 * [ WFC: Parsed Entity ]
6136 * An entity reference must not contain the name of an
6137 * unparsed entity
6138 */
6139 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006140 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006141 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006142 }
6143
6144 /*
6145 * [ WFC: No External Entity References ]
6146 * Attribute values cannot contain direct or indirect
6147 * entity references to external entities.
6148 */
6149 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006151 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6152 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 }
6154 /*
6155 * [ WFC: No < in Attribute Values ]
6156 * The replacement text of any entity referred to directly or
6157 * indirectly in an attribute value (other than "&lt;") must
6158 * not contain a <.
6159 */
6160 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6161 (ent != NULL) &&
6162 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6163 (ent->content != NULL) &&
6164 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006165 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006166 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006167 }
6168
6169 /*
6170 * Internal check, no parameter entities here ...
6171 */
6172 else {
6173 switch (ent->etype) {
6174 case XML_INTERNAL_PARAMETER_ENTITY:
6175 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006176 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6177 "Attempt to reference the parameter entity '%s'\n",
6178 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006179 break;
6180 default:
6181 break;
6182 }
6183 }
6184
6185 /*
6186 * [ WFC: No Recursion ]
6187 * A parsed entity must not contain a recursive reference
6188 * to itself, either directly or indirectly.
6189 * Done somewhere else
6190 */
6191
6192 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006193 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006194 }
Owen Taylor3473f882001-02-23 17:55:21 +00006195 }
6196 }
6197 return(ent);
6198}
6199
6200/**
6201 * xmlParseStringEntityRef:
6202 * @ctxt: an XML parser context
6203 * @str: a pointer to an index in the string
6204 *
6205 * parse ENTITY references declarations, but this version parses it from
6206 * a string value.
6207 *
6208 * [68] EntityRef ::= '&' Name ';'
6209 *
6210 * [ WFC: Entity Declared ]
6211 * In a document without any DTD, a document with only an internal DTD
6212 * subset which contains no parameter entity references, or a document
6213 * with "standalone='yes'", the Name given in the entity reference
6214 * must match that in an entity declaration, except that well-formed
6215 * documents need not declare any of the following entities: amp, lt,
6216 * gt, apos, quot. The declaration of a parameter entity must precede
6217 * any reference to it. Similarly, the declaration of a general entity
6218 * must precede any reference to it which appears in a default value in an
6219 * attribute-list declaration. Note that if entities are declared in the
6220 * external subset or in external parameter entities, a non-validating
6221 * processor is not obligated to read and process their declarations;
6222 * for such documents, the rule that an entity must be declared is a
6223 * well-formedness constraint only if standalone='yes'.
6224 *
6225 * [ WFC: Parsed Entity ]
6226 * An entity reference must not contain the name of an unparsed entity
6227 *
6228 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6229 * is updated to the current location in the string.
6230 */
6231xmlEntityPtr
6232xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6233 xmlChar *name;
6234 const xmlChar *ptr;
6235 xmlChar cur;
6236 xmlEntityPtr ent = NULL;
6237
6238 if ((str == NULL) || (*str == NULL))
6239 return(NULL);
6240 ptr = *str;
6241 cur = *ptr;
6242 if (cur == '&') {
6243 ptr++;
6244 cur = *ptr;
6245 name = xmlParseStringName(ctxt, &ptr);
6246 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006247 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6248 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006249 } else {
6250 if (*ptr == ';') {
6251 ptr++;
6252 /*
6253 * Ask first SAX for entity resolution, otherwise try the
6254 * predefined set.
6255 */
6256 if (ctxt->sax != NULL) {
6257 if (ctxt->sax->getEntity != NULL)
6258 ent = ctxt->sax->getEntity(ctxt->userData, name);
6259 if (ent == NULL)
6260 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006261 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006262 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006263 }
Owen Taylor3473f882001-02-23 17:55:21 +00006264 }
6265 /*
6266 * [ WFC: Entity Declared ]
6267 * In a document without any DTD, a document with only an
6268 * internal DTD subset which contains no parameter entity
6269 * references, or a document with "standalone='yes'", the
6270 * Name given in the entity reference must match that in an
6271 * entity declaration, except that well-formed documents
6272 * need not declare any of the following entities: amp, lt,
6273 * gt, apos, quot.
6274 * The declaration of a parameter entity must precede any
6275 * reference to it.
6276 * Similarly, the declaration of a general entity must
6277 * precede any reference to it which appears in a default
6278 * value in an attribute-list declaration. Note that if
6279 * entities are declared in the external subset or in
6280 * external parameter entities, a non-validating processor
6281 * is not obligated to read and process their declarations;
6282 * for such documents, the rule that an entity must be
6283 * declared is a well-formedness constraint only if
6284 * standalone='yes'.
6285 */
6286 if (ent == NULL) {
6287 if ((ctxt->standalone == 1) ||
6288 ((ctxt->hasExternalSubset == 0) &&
6289 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006290 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006291 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006292 } else {
6293 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6294 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6295 ctxt->sax->warning(ctxt->userData,
6296 "Entity '%s' not defined\n", name);
6297 }
6298 }
6299
6300 /*
6301 * [ WFC: Parsed Entity ]
6302 * An entity reference must not contain the name of an
6303 * unparsed entity
6304 */
6305 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6306 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6308 ctxt->sax->error(ctxt->userData,
6309 "Entity reference to unparsed entity %s\n", name);
6310 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006311 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006312 }
6313
6314 /*
6315 * [ WFC: No External Entity References ]
6316 * Attribute values cannot contain direct or indirect
6317 * entity references to external entities.
6318 */
6319 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6320 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6321 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6323 ctxt->sax->error(ctxt->userData,
6324 "Attribute references external entity '%s'\n", name);
6325 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006326 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006327 }
6328 /*
6329 * [ WFC: No < in Attribute Values ]
6330 * The replacement text of any entity referred to directly or
6331 * indirectly in an attribute value (other than "&lt;") must
6332 * not contain a <.
6333 */
6334 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6335 (ent != NULL) &&
6336 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6337 (ent->content != NULL) &&
6338 (xmlStrchr(ent->content, '<'))) {
6339 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6341 ctxt->sax->error(ctxt->userData,
6342 "'<' in entity '%s' is not allowed in attributes values\n", name);
6343 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006344 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006345 }
6346
6347 /*
6348 * Internal check, no parameter entities here ...
6349 */
6350 else {
6351 switch (ent->etype) {
6352 case XML_INTERNAL_PARAMETER_ENTITY:
6353 case XML_EXTERNAL_PARAMETER_ENTITY:
6354 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6356 ctxt->sax->error(ctxt->userData,
6357 "Attempt to reference the parameter entity '%s'\n", name);
6358 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006359 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006360 break;
6361 default:
6362 break;
6363 }
6364 }
6365
6366 /*
6367 * [ WFC: No Recursion ]
6368 * A parsed entity must not contain a recursive reference
6369 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006370 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006371 */
6372
6373 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006374 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 }
6376 xmlFree(name);
6377 }
6378 }
6379 *str = ptr;
6380 return(ent);
6381}
6382
6383/**
6384 * xmlParsePEReference:
6385 * @ctxt: an XML parser context
6386 *
6387 * parse PEReference declarations
6388 * The entity content is handled directly by pushing it's content as
6389 * a new input stream.
6390 *
6391 * [69] PEReference ::= '%' Name ';'
6392 *
6393 * [ WFC: No Recursion ]
6394 * A parsed entity must not contain a recursive
6395 * reference to itself, either directly or indirectly.
6396 *
6397 * [ WFC: Entity Declared ]
6398 * In a document without any DTD, a document with only an internal DTD
6399 * subset which contains no parameter entity references, or a document
6400 * with "standalone='yes'", ... ... The declaration of a parameter
6401 * entity must precede any reference to it...
6402 *
6403 * [ VC: Entity Declared ]
6404 * In a document with an external subset or external parameter entities
6405 * with "standalone='no'", ... ... The declaration of a parameter entity
6406 * must precede any reference to it...
6407 *
6408 * [ WFC: In DTD ]
6409 * Parameter-entity references may only appear in the DTD.
6410 * NOTE: misleading but this is handled.
6411 */
6412void
6413xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006414 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006415 xmlEntityPtr entity = NULL;
6416 xmlParserInputPtr input;
6417
6418 if (RAW == '%') {
6419 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006420 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006421 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006422 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6423 "xmlParsePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006424 } else {
6425 if (RAW == ';') {
6426 NEXT;
6427 if ((ctxt->sax != NULL) &&
6428 (ctxt->sax->getParameterEntity != NULL))
6429 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6430 name);
6431 if (entity == NULL) {
6432 /*
6433 * [ WFC: Entity Declared ]
6434 * In a document without any DTD, a document with only an
6435 * internal DTD subset which contains no parameter entity
6436 * references, or a document with "standalone='yes'", ...
6437 * ... The declaration of a parameter entity must precede
6438 * any reference to it...
6439 */
6440 if ((ctxt->standalone == 1) ||
6441 ((ctxt->hasExternalSubset == 0) &&
6442 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006443 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006444 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006445 } else {
6446 /*
6447 * [ VC: Entity Declared ]
6448 * In a document with an external subset or external
6449 * parameter entities with "standalone='no'", ...
6450 * ... The declaration of a parameter entity must precede
6451 * any reference to it...
6452 */
6453 if ((!ctxt->disableSAX) &&
6454 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6455 ctxt->sax->warning(ctxt->userData,
6456 "PEReference: %%%s; not found\n", name);
6457 ctxt->valid = 0;
6458 }
6459 } else {
6460 /*
6461 * Internal checking in case the entity quest barfed
6462 */
6463 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6464 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6465 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6466 ctxt->sax->warning(ctxt->userData,
6467 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006468 } else if (ctxt->input->free != deallocblankswrapper) {
6469 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6470 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006471 } else {
6472 /*
6473 * TODO !!!
6474 * handle the extra spaces added before and after
6475 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6476 */
6477 input = xmlNewEntityInputStream(ctxt, entity);
6478 xmlPushInput(ctxt, input);
6479 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6480 (RAW == '<') && (NXT(1) == '?') &&
6481 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6482 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6483 xmlParseTextDecl(ctxt);
6484 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6485 /*
6486 * The XML REC instructs us to stop parsing
6487 * right here
6488 */
6489 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006490 return;
6491 }
6492 }
Owen Taylor3473f882001-02-23 17:55:21 +00006493 }
6494 }
6495 ctxt->hasPErefs = 1;
6496 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006497 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006498 }
Owen Taylor3473f882001-02-23 17:55:21 +00006499 }
6500 }
6501}
6502
6503/**
6504 * xmlParseStringPEReference:
6505 * @ctxt: an XML parser context
6506 * @str: a pointer to an index in the string
6507 *
6508 * parse PEReference declarations
6509 *
6510 * [69] PEReference ::= '%' Name ';'
6511 *
6512 * [ WFC: No Recursion ]
6513 * A parsed entity must not contain a recursive
6514 * reference to itself, either directly or indirectly.
6515 *
6516 * [ WFC: Entity Declared ]
6517 * In a document without any DTD, a document with only an internal DTD
6518 * subset which contains no parameter entity references, or a document
6519 * with "standalone='yes'", ... ... The declaration of a parameter
6520 * entity must precede any reference to it...
6521 *
6522 * [ VC: Entity Declared ]
6523 * In a document with an external subset or external parameter entities
6524 * with "standalone='no'", ... ... The declaration of a parameter entity
6525 * must precede any reference to it...
6526 *
6527 * [ WFC: In DTD ]
6528 * Parameter-entity references may only appear in the DTD.
6529 * NOTE: misleading but this is handled.
6530 *
6531 * Returns the string of the entity content.
6532 * str is updated to the current value of the index
6533 */
6534xmlEntityPtr
6535xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6536 const xmlChar *ptr;
6537 xmlChar cur;
6538 xmlChar *name;
6539 xmlEntityPtr entity = NULL;
6540
6541 if ((str == NULL) || (*str == NULL)) return(NULL);
6542 ptr = *str;
6543 cur = *ptr;
6544 if (cur == '%') {
6545 ptr++;
6546 cur = *ptr;
6547 name = xmlParseStringName(ctxt, &ptr);
6548 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006549 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6550 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006551 } else {
6552 cur = *ptr;
6553 if (cur == ';') {
6554 ptr++;
6555 cur = *ptr;
6556 if ((ctxt->sax != NULL) &&
6557 (ctxt->sax->getParameterEntity != NULL))
6558 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6559 name);
6560 if (entity == NULL) {
6561 /*
6562 * [ WFC: Entity Declared ]
6563 * In a document without any DTD, a document with only an
6564 * internal DTD subset which contains no parameter entity
6565 * references, or a document with "standalone='yes'", ...
6566 * ... The declaration of a parameter entity must precede
6567 * any reference to it...
6568 */
6569 if ((ctxt->standalone == 1) ||
6570 ((ctxt->hasExternalSubset == 0) &&
6571 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006572 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006573 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006574 } else {
6575 /*
6576 * [ VC: Entity Declared ]
6577 * In a document with an external subset or external
6578 * parameter entities with "standalone='no'", ...
6579 * ... The declaration of a parameter entity must
6580 * precede any reference to it...
6581 */
6582 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6583 ctxt->sax->warning(ctxt->userData,
6584 "PEReference: %%%s; not found\n", name);
6585 ctxt->valid = 0;
6586 }
6587 } else {
6588 /*
6589 * Internal checking in case the entity quest barfed
6590 */
6591 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6592 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6593 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6594 ctxt->sax->warning(ctxt->userData,
6595 "Internal: %%%s; is not a parameter entity\n", name);
6596 }
6597 }
6598 ctxt->hasPErefs = 1;
6599 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006600 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006601 }
6602 xmlFree(name);
6603 }
6604 }
6605 *str = ptr;
6606 return(entity);
6607}
6608
6609/**
6610 * xmlParseDocTypeDecl:
6611 * @ctxt: an XML parser context
6612 *
6613 * parse a DOCTYPE declaration
6614 *
6615 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6616 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6617 *
6618 * [ VC: Root Element Type ]
6619 * The Name in the document type declaration must match the element
6620 * type of the root element.
6621 */
6622
6623void
6624xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006625 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006626 xmlChar *ExternalID = NULL;
6627 xmlChar *URI = NULL;
6628
6629 /*
6630 * We know that '<!DOCTYPE' has been detected.
6631 */
6632 SKIP(9);
6633
6634 SKIP_BLANKS;
6635
6636 /*
6637 * Parse the DOCTYPE name.
6638 */
6639 name = xmlParseName(ctxt);
6640 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006641 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6642 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006643 }
6644 ctxt->intSubName = name;
6645
6646 SKIP_BLANKS;
6647
6648 /*
6649 * Check for SystemID and ExternalID
6650 */
6651 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6652
6653 if ((URI != NULL) || (ExternalID != NULL)) {
6654 ctxt->hasExternalSubset = 1;
6655 }
6656 ctxt->extSubURI = URI;
6657 ctxt->extSubSystem = ExternalID;
6658
6659 SKIP_BLANKS;
6660
6661 /*
6662 * Create and update the internal subset.
6663 */
6664 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6665 (!ctxt->disableSAX))
6666 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6667
6668 /*
6669 * Is there any internal subset declarations ?
6670 * they are handled separately in xmlParseInternalSubset()
6671 */
6672 if (RAW == '[')
6673 return;
6674
6675 /*
6676 * We should be at the end of the DOCTYPE declaration.
6677 */
6678 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006679 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006680 }
6681 NEXT;
6682}
6683
6684/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006685 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006686 * @ctxt: an XML parser context
6687 *
6688 * parse the internal subset declaration
6689 *
6690 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6691 */
6692
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006693static void
Owen Taylor3473f882001-02-23 17:55:21 +00006694xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6695 /*
6696 * Is there any DTD definition ?
6697 */
6698 if (RAW == '[') {
6699 ctxt->instate = XML_PARSER_DTD;
6700 NEXT;
6701 /*
6702 * Parse the succession of Markup declarations and
6703 * PEReferences.
6704 * Subsequence (markupdecl | PEReference | S)*
6705 */
6706 while (RAW != ']') {
6707 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006708 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006709
6710 SKIP_BLANKS;
6711 xmlParseMarkupDecl(ctxt);
6712 xmlParsePEReference(ctxt);
6713
6714 /*
6715 * Pop-up of finished entities.
6716 */
6717 while ((RAW == 0) && (ctxt->inputNr > 1))
6718 xmlPopInput(ctxt);
6719
6720 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006721 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006722 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006723 break;
6724 }
6725 }
6726 if (RAW == ']') {
6727 NEXT;
6728 SKIP_BLANKS;
6729 }
6730 }
6731
6732 /*
6733 * We should be at the end of the DOCTYPE declaration.
6734 */
6735 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006736 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006737 }
6738 NEXT;
6739}
6740
6741/**
6742 * xmlParseAttribute:
6743 * @ctxt: an XML parser context
6744 * @value: a xmlChar ** used to store the value of the attribute
6745 *
6746 * parse an attribute
6747 *
6748 * [41] Attribute ::= Name Eq AttValue
6749 *
6750 * [ WFC: No External Entity References ]
6751 * Attribute values cannot contain direct or indirect entity references
6752 * to external entities.
6753 *
6754 * [ WFC: No < in Attribute Values ]
6755 * The replacement text of any entity referred to directly or indirectly in
6756 * an attribute value (other than "&lt;") must not contain a <.
6757 *
6758 * [ VC: Attribute Value Type ]
6759 * The attribute must have been declared; the value must be of the type
6760 * declared for it.
6761 *
6762 * [25] Eq ::= S? '=' S?
6763 *
6764 * With namespace:
6765 *
6766 * [NS 11] Attribute ::= QName Eq AttValue
6767 *
6768 * Also the case QName == xmlns:??? is handled independently as a namespace
6769 * definition.
6770 *
6771 * Returns the attribute name, and the value in *value.
6772 */
6773
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006774const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006775xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006776 const xmlChar *name;
6777 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006778
6779 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006780 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006781 name = xmlParseName(ctxt);
6782 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006783 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6784 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006785 return(NULL);
6786 }
6787
6788 /*
6789 * read the value
6790 */
6791 SKIP_BLANKS;
6792 if (RAW == '=') {
6793 NEXT;
6794 SKIP_BLANKS;
6795 val = xmlParseAttValue(ctxt);
6796 ctxt->instate = XML_PARSER_CONTENT;
6797 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006798 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006799 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006800 return(NULL);
6801 }
6802
6803 /*
6804 * Check that xml:lang conforms to the specification
6805 * No more registered as an error, just generate a warning now
6806 * since this was deprecated in XML second edition
6807 */
6808 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6809 if (!xmlCheckLanguageID(val)) {
6810 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6811 ctxt->sax->warning(ctxt->userData,
6812 "Malformed value for xml:lang : %s\n", val);
6813 }
6814 }
6815
6816 /*
6817 * Check that xml:space conforms to the specification
6818 */
6819 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6820 if (xmlStrEqual(val, BAD_CAST "default"))
6821 *(ctxt->space) = 0;
6822 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6823 *(ctxt->space) = 1;
6824 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006825 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006826"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006827 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006828 }
6829 }
6830
6831 *value = val;
6832 return(name);
6833}
6834
6835/**
6836 * xmlParseStartTag:
6837 * @ctxt: an XML parser context
6838 *
6839 * parse a start of tag either for rule element or
6840 * EmptyElement. In both case we don't parse the tag closing chars.
6841 *
6842 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6843 *
6844 * [ WFC: Unique Att Spec ]
6845 * No attribute name may appear more than once in the same start-tag or
6846 * empty-element tag.
6847 *
6848 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6849 *
6850 * [ WFC: Unique Att Spec ]
6851 * No attribute name may appear more than once in the same start-tag or
6852 * empty-element tag.
6853 *
6854 * With namespace:
6855 *
6856 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6857 *
6858 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6859 *
6860 * Returns the element name parsed
6861 */
6862
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006863const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006864xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006865 const xmlChar *name;
6866 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006867 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006868 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006869 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006870 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006871 int i;
6872
6873 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006874 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006875
6876 name = xmlParseName(ctxt);
6877 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006878 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006879 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006880 return(NULL);
6881 }
6882
6883 /*
6884 * Now parse the attributes, it ends up with the ending
6885 *
6886 * (S Attribute)* S?
6887 */
6888 SKIP_BLANKS;
6889 GROW;
6890
Daniel Veillard21a0f912001-02-25 19:54:14 +00006891 while ((RAW != '>') &&
6892 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006893 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006894 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006895 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006896
6897 attname = xmlParseAttribute(ctxt, &attvalue);
6898 if ((attname != NULL) && (attvalue != NULL)) {
6899 /*
6900 * [ WFC: Unique Att Spec ]
6901 * No attribute name may appear more than once in the same
6902 * start-tag or empty-element tag.
6903 */
6904 for (i = 0; i < nbatts;i += 2) {
6905 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006906 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006907 xmlFree(attvalue);
6908 goto failed;
6909 }
6910 }
Owen Taylor3473f882001-02-23 17:55:21 +00006911 /*
6912 * Add the pair to atts
6913 */
6914 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006915 maxatts = 22; /* allow for 10 attrs by default */
6916 atts = (const xmlChar **)
6917 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006918 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006919 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006920 if (attvalue != NULL)
6921 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006922 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006923 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006924 ctxt->atts = atts;
6925 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006926 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006927 const xmlChar **n;
6928
Owen Taylor3473f882001-02-23 17:55:21 +00006929 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006930 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006931 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006932 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006933 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006934 if (attvalue != NULL)
6935 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006936 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006937 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006938 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006939 ctxt->atts = atts;
6940 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006941 }
6942 atts[nbatts++] = attname;
6943 atts[nbatts++] = attvalue;
6944 atts[nbatts] = NULL;
6945 atts[nbatts + 1] = NULL;
6946 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006947 if (attvalue != NULL)
6948 xmlFree(attvalue);
6949 }
6950
6951failed:
6952
Daniel Veillard3772de32002-12-17 10:31:45 +00006953 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006954 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6955 break;
6956 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006957 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6958 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006959 }
6960 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006961 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6962 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006963 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6964 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006965 break;
6966 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006967 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006968 GROW;
6969 }
6970
6971 /*
6972 * SAX: Start of Element !
6973 */
6974 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006975 (!ctxt->disableSAX)) {
6976 if (nbatts > 0)
6977 ctxt->sax->startElement(ctxt->userData, name, atts);
6978 else
6979 ctxt->sax->startElement(ctxt->userData, name, NULL);
6980 }
Owen Taylor3473f882001-02-23 17:55:21 +00006981
6982 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006983 /* Free only the content strings */
6984 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006985 if (atts[i] != NULL)
6986 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006987 }
6988 return(name);
6989}
6990
6991/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006992 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006993 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006994 * @line: line of the start tag
6995 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006996 *
6997 * parse an end of tag
6998 *
6999 * [42] ETag ::= '</' Name S? '>'
7000 *
7001 * With namespace
7002 *
7003 * [NS 9] ETag ::= '</' QName S? '>'
7004 */
7005
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007006static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007007xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007008 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007009
7010 GROW;
7011 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7013 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007014 return;
7015 }
7016 SKIP(2);
7017
Daniel Veillard46de64e2002-05-29 08:21:33 +00007018 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007019
7020 /*
7021 * We should definitely be at the ending "S? '>'" part
7022 */
7023 GROW;
7024 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00007025 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007026 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007027 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007028 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007029
7030 /*
7031 * [ WFC: Element Type Match ]
7032 * The Name in an element's end-tag must match the element type in the
7033 * start-tag.
7034 *
7035 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007036 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00007037 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
7038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00007039 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007040 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007041 "Opening and ending tag mismatch: %s line %d and %s\n",
7042 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00007043 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007044 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007045 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007046 }
7047
7048 }
7049 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007050 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007051 }
7052
7053 /*
7054 * SAX: End of Tag
7055 */
7056 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7057 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007058 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007059
Daniel Veillarde57ec792003-09-10 10:50:59 +00007060 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007061 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007062 return;
7063}
7064
7065/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007066 * xmlParseEndTag:
7067 * @ctxt: an XML parser context
7068 *
7069 * parse an end of tag
7070 *
7071 * [42] ETag ::= '</' Name S? '>'
7072 *
7073 * With namespace
7074 *
7075 * [NS 9] ETag ::= '</' QName S? '>'
7076 */
7077
7078void
7079xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007080 xmlParseEndTag1(ctxt, 0);
7081}
7082
7083/************************************************************************
7084 * *
7085 * SAX 2 specific operations *
7086 * *
7087 ************************************************************************/
7088
7089static const xmlChar *
7090xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7091 int len = 0, l;
7092 int c;
7093 int count = 0;
7094
7095 /*
7096 * Handler for more complex cases
7097 */
7098 GROW;
7099 c = CUR_CHAR(l);
7100 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007101 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007102 return(NULL);
7103 }
7104
7105 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7106 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007107 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard0fb18932003-09-07 09:14:37 +00007108 (IS_COMBINING(c)) ||
7109 (IS_EXTENDER(c)))) {
7110 if (count++ > 100) {
7111 count = 0;
7112 GROW;
7113 }
7114 len += l;
7115 NEXTL(l);
7116 c = CUR_CHAR(l);
7117 }
7118 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7119}
7120
7121/*
7122 * xmlGetNamespace:
7123 * @ctxt: an XML parser context
7124 * @prefix: the prefix to lookup
7125 *
7126 * Lookup the namespace name for the @prefix (which ca be NULL)
7127 * The prefix must come from the @ctxt->dict dictionnary
7128 *
7129 * Returns the namespace name or NULL if not bound
7130 */
7131static const xmlChar *
7132xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7133 int i;
7134
Daniel Veillarde57ec792003-09-10 10:50:59 +00007135 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007136 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007137 if (ctxt->nsTab[i] == prefix) {
7138 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7139 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007140 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007141 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007142 return(NULL);
7143}
7144
7145/**
7146 * xmlParseNCName:
7147 * @ctxt: an XML parser context
7148 *
7149 * parse an XML name.
7150 *
7151 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7152 * CombiningChar | Extender
7153 *
7154 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7155 *
7156 * Returns the Name parsed or NULL
7157 */
7158
7159static const xmlChar *
7160xmlParseNCName(xmlParserCtxtPtr ctxt) {
7161 const xmlChar *in;
7162 const xmlChar *ret;
7163 int count = 0;
7164
7165 /*
7166 * Accelerator for simple ASCII names
7167 */
7168 in = ctxt->input->cur;
7169 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7170 ((*in >= 0x41) && (*in <= 0x5A)) ||
7171 (*in == '_')) {
7172 in++;
7173 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7174 ((*in >= 0x41) && (*in <= 0x5A)) ||
7175 ((*in >= 0x30) && (*in <= 0x39)) ||
7176 (*in == '_') || (*in == '-') ||
7177 (*in == '.'))
7178 in++;
7179 if ((*in > 0) && (*in < 0x80)) {
7180 count = in - ctxt->input->cur;
7181 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7182 ctxt->input->cur = in;
7183 ctxt->nbChars += count;
7184 ctxt->input->col += count;
7185 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007186 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007187 }
7188 return(ret);
7189 }
7190 }
7191 return(xmlParseNCNameComplex(ctxt));
7192}
7193
7194/**
7195 * xmlParseQName:
7196 * @ctxt: an XML parser context
7197 * @prefix: pointer to store the prefix part
7198 *
7199 * parse an XML Namespace QName
7200 *
7201 * [6] QName ::= (Prefix ':')? LocalPart
7202 * [7] Prefix ::= NCName
7203 * [8] LocalPart ::= NCName
7204 *
7205 * Returns the Name parsed or NULL
7206 */
7207
7208static const xmlChar *
7209xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7210 const xmlChar *l, *p;
7211
7212 GROW;
7213
7214 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007215 if (l == NULL) {
7216 if (CUR == ':') {
7217 l = xmlParseName(ctxt);
7218 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007219 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7220 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007221 *prefix = NULL;
7222 return(l);
7223 }
7224 }
7225 return(NULL);
7226 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007227 if (CUR == ':') {
7228 NEXT;
7229 p = l;
7230 l = xmlParseNCName(ctxt);
7231 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007232 xmlChar *tmp;
7233
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007234 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7235 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007236 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7237 p = xmlDictLookup(ctxt->dict, tmp, -1);
7238 if (tmp != NULL) xmlFree(tmp);
7239 *prefix = NULL;
7240 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 }
7242 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007243 xmlChar *tmp;
7244
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007245 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7246 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007247 NEXT;
7248 tmp = (xmlChar *) xmlParseName(ctxt);
7249 if (tmp != NULL) {
7250 tmp = xmlBuildQName(tmp, l, NULL, 0);
7251 l = xmlDictLookup(ctxt->dict, tmp, -1);
7252 if (tmp != NULL) xmlFree(tmp);
7253 *prefix = p;
7254 return(l);
7255 }
7256 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7257 l = xmlDictLookup(ctxt->dict, tmp, -1);
7258 if (tmp != NULL) xmlFree(tmp);
7259 *prefix = p;
7260 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007261 }
7262 *prefix = p;
7263 } else
7264 *prefix = NULL;
7265 return(l);
7266}
7267
7268/**
7269 * xmlParseQNameAndCompare:
7270 * @ctxt: an XML parser context
7271 * @name: the localname
7272 * @prefix: the prefix, if any.
7273 *
7274 * parse an XML name and compares for match
7275 * (specialized for endtag parsing)
7276 *
7277 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7278 * and the name for mismatch
7279 */
7280
7281static const xmlChar *
7282xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7283 xmlChar const *prefix) {
7284 const xmlChar *cmp = name;
7285 const xmlChar *in;
7286 const xmlChar *ret;
7287 const xmlChar *prefix2;
7288
7289 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7290
7291 GROW;
7292 in = ctxt->input->cur;
7293
7294 cmp = prefix;
7295 while (*in != 0 && *in == *cmp) {
7296 ++in;
7297 ++cmp;
7298 }
7299 if ((*cmp == 0) && (*in == ':')) {
7300 in++;
7301 cmp = name;
7302 while (*in != 0 && *in == *cmp) {
7303 ++in;
7304 ++cmp;
7305 }
7306 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7307 /* success */
7308 ctxt->input->cur = in;
7309 return((const xmlChar*) 1);
7310 }
7311 }
7312 /*
7313 * all strings coms from the dictionary, equality can be done directly
7314 */
7315 ret = xmlParseQName (ctxt, &prefix2);
7316 if ((ret == name) && (prefix == prefix2))
7317 return((const xmlChar*) 1);
7318 return ret;
7319}
7320
7321/**
7322 * xmlParseAttValueInternal:
7323 * @ctxt: an XML parser context
7324 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007325 * @alloc: whether the attribute was reallocated as a new string
7326 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007327 *
7328 * parse a value for an attribute.
7329 * NOTE: if no normalization is needed, the routine will return pointers
7330 * directly from the data buffer.
7331 *
7332 * 3.3.3 Attribute-Value Normalization:
7333 * Before the value of an attribute is passed to the application or
7334 * checked for validity, the XML processor must normalize it as follows:
7335 * - a character reference is processed by appending the referenced
7336 * character to the attribute value
7337 * - an entity reference is processed by recursively processing the
7338 * replacement text of the entity
7339 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7340 * appending #x20 to the normalized value, except that only a single
7341 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7342 * parsed entity or the literal entity value of an internal parsed entity
7343 * - other characters are processed by appending them to the normalized value
7344 * If the declared value is not CDATA, then the XML processor must further
7345 * process the normalized attribute value by discarding any leading and
7346 * trailing space (#x20) characters, and by replacing sequences of space
7347 * (#x20) characters by a single space (#x20) character.
7348 * All attributes for which no declaration has been read should be treated
7349 * by a non-validating parser as if declared CDATA.
7350 *
7351 * Returns the AttValue parsed or NULL. The value has to be freed by the
7352 * caller if it was copied, this can be detected by val[*len] == 0.
7353 */
7354
7355static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007356xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7357 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007358{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007359 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007360 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007361 xmlChar *ret = NULL;
7362
7363 GROW;
7364 in = (xmlChar *) CUR_PTR;
7365 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007366 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007367 return (NULL);
7368 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007369 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007370
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007371 /*
7372 * try to handle in this routine the most common case where no
7373 * allocation of a new string is required and where content is
7374 * pure ASCII.
7375 */
7376 limit = *in++;
7377 end = ctxt->input->end;
7378 start = in;
7379 if (in >= end) {
7380 const xmlChar *oldbase = ctxt->input->base;
7381 GROW;
7382 if (oldbase != ctxt->input->base) {
7383 long delta = ctxt->input->base - oldbase;
7384 start = start + delta;
7385 in = in + delta;
7386 }
7387 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007388 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007389 if (normalize) {
7390 /*
7391 * Skip any leading spaces
7392 */
7393 while ((in < end) && (*in != limit) &&
7394 ((*in == 0x20) || (*in == 0x9) ||
7395 (*in == 0xA) || (*in == 0xD))) {
7396 in++;
7397 start = in;
7398 if (in >= end) {
7399 const xmlChar *oldbase = ctxt->input->base;
7400 GROW;
7401 if (oldbase != ctxt->input->base) {
7402 long delta = ctxt->input->base - oldbase;
7403 start = start + delta;
7404 in = in + delta;
7405 }
7406 end = ctxt->input->end;
7407 }
7408 }
7409 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7410 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7411 if ((*in++ == 0x20) && (*in == 0x20)) break;
7412 if (in >= end) {
7413 const xmlChar *oldbase = ctxt->input->base;
7414 GROW;
7415 if (oldbase != ctxt->input->base) {
7416 long delta = ctxt->input->base - oldbase;
7417 start = start + delta;
7418 in = in + delta;
7419 }
7420 end = ctxt->input->end;
7421 }
7422 }
7423 last = in;
7424 /*
7425 * skip the trailing blanks
7426 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007427 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007428 while ((in < end) && (*in != limit) &&
7429 ((*in == 0x20) || (*in == 0x9) ||
7430 (*in == 0xA) || (*in == 0xD))) {
7431 in++;
7432 if (in >= end) {
7433 const xmlChar *oldbase = ctxt->input->base;
7434 GROW;
7435 if (oldbase != ctxt->input->base) {
7436 long delta = ctxt->input->base - oldbase;
7437 start = start + delta;
7438 in = in + delta;
7439 last = last + delta;
7440 }
7441 end = ctxt->input->end;
7442 }
7443 }
7444 if (*in != limit) goto need_complex;
7445 } else {
7446 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7447 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7448 in++;
7449 if (in >= end) {
7450 const xmlChar *oldbase = ctxt->input->base;
7451 GROW;
7452 if (oldbase != ctxt->input->base) {
7453 long delta = ctxt->input->base - oldbase;
7454 start = start + delta;
7455 in = in + delta;
7456 }
7457 end = ctxt->input->end;
7458 }
7459 }
7460 last = in;
7461 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007462 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007463 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007464 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007465 *len = last - start;
7466 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007467 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007468 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007469 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007470 }
7471 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007472 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007473 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007474need_complex:
7475 if (alloc) *alloc = 1;
7476 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477}
7478
7479/**
7480 * xmlParseAttribute2:
7481 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007482 * @pref: the element prefix
7483 * @elem: the element name
7484 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007485 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007486 * @len: an int * to save the length of the attribute
7487 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488 *
7489 * parse an attribute in the new SAX2 framework.
7490 *
7491 * Returns the attribute name, and the value in *value, .
7492 */
7493
7494static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007495xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7496 const xmlChar *pref, const xmlChar *elem,
7497 const xmlChar **prefix, xmlChar **value,
7498 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007499 const xmlChar *name;
7500 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007501 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007502
7503 *value = NULL;
7504 GROW;
7505 name = xmlParseQName(ctxt, prefix);
7506 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007507 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7508 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007509 return(NULL);
7510 }
7511
7512 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007513 * get the type if needed
7514 */
7515 if (ctxt->attsSpecial != NULL) {
7516 int type;
7517
7518 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7519 pref, elem, *prefix, name);
7520 if (type != 0) normalize = 1;
7521 }
7522
7523 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 * read the value
7525 */
7526 SKIP_BLANKS;
7527 if (RAW == '=') {
7528 NEXT;
7529 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007530 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007531 ctxt->instate = XML_PARSER_CONTENT;
7532 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007533 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007534 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007535 return(NULL);
7536 }
7537
7538 /*
7539 * Check that xml:lang conforms to the specification
7540 * No more registered as an error, just generate a warning now
7541 * since this was deprecated in XML second edition
7542 */
7543 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7544 if (!xmlCheckLanguageID(val)) {
7545 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7546 ctxt->sax->warning(ctxt->userData,
7547 "Malformed value for xml:lang : %s\n", val);
7548 }
7549 }
7550
7551 /*
7552 * Check that xml:space conforms to the specification
7553 */
7554 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7555 if (xmlStrEqual(val, BAD_CAST "default"))
7556 *(ctxt->space) = 0;
7557 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7558 *(ctxt->space) = 1;
7559 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007560 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7562 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007563 }
7564 }
7565
7566 *value = val;
7567 return(name);
7568}
7569
7570/**
7571 * xmlParseStartTag2:
7572 * @ctxt: an XML parser context
7573 *
7574 * parse a start of tag either for rule element or
7575 * EmptyElement. In both case we don't parse the tag closing chars.
7576 * This routine is called when running SAX2 parsing
7577 *
7578 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7579 *
7580 * [ WFC: Unique Att Spec ]
7581 * No attribute name may appear more than once in the same start-tag or
7582 * empty-element tag.
7583 *
7584 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7585 *
7586 * [ WFC: Unique Att Spec ]
7587 * No attribute name may appear more than once in the same start-tag or
7588 * empty-element tag.
7589 *
7590 * With namespace:
7591 *
7592 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7593 *
7594 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7595 *
7596 * Returns the element name parsed
7597 */
7598
7599static const xmlChar *
7600xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7601 const xmlChar **URI) {
7602 const xmlChar *localname;
7603 const xmlChar *prefix;
7604 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007605 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007606 const xmlChar *nsname;
7607 xmlChar *attvalue;
7608 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007610 int nratts, nbatts, nbdef;
7611 int i, j, nbNs, attval;
7612 const xmlChar *base;
7613 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007614
7615 if (RAW != '<') return(NULL);
7616 NEXT1;
7617
7618 /*
7619 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7620 * point since the attribute values may be stored as pointers to
7621 * the buffer and calling SHRINK would destroy them !
7622 * The Shrinking is only possible once the full set of attribute
7623 * callbacks have been done.
7624 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007625reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007626 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007627 base = ctxt->input->base;
7628 cur = ctxt->input->cur - ctxt->input->base;
7629 nbatts = 0;
7630 nratts = 0;
7631 nbdef = 0;
7632 nbNs = 0;
7633 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007634
7635 localname = xmlParseQName(ctxt, &prefix);
7636 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7638 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007639 return(NULL);
7640 }
7641
7642 /*
7643 * Now parse the attributes, it ends up with the ending
7644 *
7645 * (S Attribute)* S?
7646 */
7647 SKIP_BLANKS;
7648 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007649 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007650
7651 while ((RAW != '>') &&
7652 ((RAW != '/') || (NXT(1) != '>')) &&
7653 (IS_CHAR((unsigned int) RAW))) {
7654 const xmlChar *q = CUR_PTR;
7655 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007656 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007657
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007658 attname = xmlParseAttribute2(ctxt, prefix, localname,
7659 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007660 if ((attname != NULL) && (attvalue != NULL)) {
7661 if (len < 0) len = xmlStrlen(attvalue);
7662 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007663 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7664 xmlURIPtr uri;
7665
7666 if (*URL != 0) {
7667 uri = xmlParseURI((const char *) URL);
7668 if (uri == NULL) {
7669 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7670 ctxt->sax->warning(ctxt->userData,
7671 "xmlns: %s not a valid URI\n", URL);
7672 } else {
7673 if (uri->scheme == NULL) {
7674 if ((ctxt->sax != NULL) &&
7675 (ctxt->sax->warning != NULL))
7676 ctxt->sax->warning(ctxt->userData,
7677 "xmlns: URI %s is not absolute\n", URL);
7678 }
7679 xmlFreeURI(uri);
7680 }
7681 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007682 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007683 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007684 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007685 for (j = 1;j <= nbNs;j++)
7686 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7687 break;
7688 if (j <= nbNs)
7689 xmlErrAttributeDup(ctxt, NULL, attname);
7690 else
7691 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007692 if (alloc != 0) xmlFree(attvalue);
7693 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007694 continue;
7695 }
7696 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007697 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7698 xmlURIPtr uri;
7699
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007700 if (attname == ctxt->str_xml) {
7701 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007702 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7703 "xml namespace prefix mapped to wrong URI\n",
7704 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007705 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007706 /*
7707 * Do not keep a namespace definition node
7708 */
7709 if (alloc != 0) xmlFree(attvalue);
7710 SKIP_BLANKS;
7711 continue;
7712 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007713 uri = xmlParseURI((const char *) URL);
7714 if (uri == NULL) {
7715 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7716 ctxt->sax->warning(ctxt->userData,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007717 "xmlns:%s: '%s' is not a valid URI\n",
7718 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007719 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007720 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007721 if ((ctxt->sax != NULL) &&
7722 (ctxt->sax->warning != NULL))
7723 ctxt->sax->warning(ctxt->userData,
7724 "xmlns:%s: URI %s is not absolute\n",
7725 attname, URL);
7726 }
7727 xmlFreeURI(uri);
7728 }
7729
Daniel Veillard0fb18932003-09-07 09:14:37 +00007730 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007731 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007732 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007733 for (j = 1;j <= nbNs;j++)
7734 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7735 break;
7736 if (j <= nbNs)
7737 xmlErrAttributeDup(ctxt, aprefix, attname);
7738 else
7739 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007740 if (alloc != 0) xmlFree(attvalue);
7741 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007742 continue;
7743 }
7744
7745 /*
7746 * Add the pair to atts
7747 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007748 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7749 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 if (attvalue[len] == 0)
7751 xmlFree(attvalue);
7752 goto failed;
7753 }
7754 maxatts = ctxt->maxatts;
7755 atts = ctxt->atts;
7756 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007757 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007758 atts[nbatts++] = attname;
7759 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007760 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007761 atts[nbatts++] = attvalue;
7762 attvalue += len;
7763 atts[nbatts++] = attvalue;
7764 /*
7765 * tag if some deallocation is needed
7766 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007767 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007768 } else {
7769 if ((attvalue != NULL) && (attvalue[len] == 0))
7770 xmlFree(attvalue);
7771 }
7772
7773failed:
7774
7775 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007776 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007777 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7778 break;
7779 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7781 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 }
7783 SKIP_BLANKS;
7784 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7785 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007786 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007788 break;
7789 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007790 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007791 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007792 }
7793
Daniel Veillard0fb18932003-09-07 09:14:37 +00007794 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007795 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007796 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007797 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007798 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7799 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007800 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007801 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007802 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007803 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007804 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007805 /*
7806 * [ WFC: Unique Att Spec ]
7807 * No attribute name may appear more than once in the same
7808 * start-tag or empty-element tag.
7809 * As extended by the Namespace in XML REC.
7810 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007811 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 if (atts[i] == atts[j]) {
7813 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007814 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007815 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007816 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007817 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007818 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007819 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007820 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007821 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822 }
7823 }
7824 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007825 }
7826
7827 /*
7828 * The attributes defaulting
7829 */
7830 if (ctxt->attsDefault != NULL) {
7831 xmlDefAttrsPtr defaults;
7832
7833 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7834 if (defaults != NULL) {
7835 for (i = 0;i < defaults->nbAttrs;i++) {
7836 attname = defaults->values[4 * i];
7837 aprefix = defaults->values[4 * i + 1];
7838
7839 /*
7840 * special work for namespaces defaulted defs
7841 */
7842 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7843 /*
7844 * check that it's not a defined namespace
7845 */
7846 for (j = 1;j <= nbNs;j++)
7847 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7848 break;
7849 if (j <= nbNs) continue;
7850
7851 nsname = xmlGetNamespace(ctxt, NULL);
7852 if (nsname != defaults->values[4 * i + 2]) {
7853 if (nsPush(ctxt, NULL,
7854 defaults->values[4 * i + 2]) > 0)
7855 nbNs++;
7856 }
7857 } else if (aprefix == ctxt->str_xmlns) {
7858 /*
7859 * check that it's not a defined namespace
7860 */
7861 for (j = 1;j <= nbNs;j++)
7862 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7863 break;
7864 if (j <= nbNs) continue;
7865
7866 nsname = xmlGetNamespace(ctxt, attname);
7867 if (nsname != defaults->values[2]) {
7868 if (nsPush(ctxt, attname,
7869 defaults->values[4 * i + 2]) > 0)
7870 nbNs++;
7871 }
7872 } else {
7873 /*
7874 * check that it's not a defined attribute
7875 */
7876 for (j = 0;j < nbatts;j+=5) {
7877 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7878 break;
7879 }
7880 if (j < nbatts) continue;
7881
7882 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7883 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007884 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007885 }
7886 maxatts = ctxt->maxatts;
7887 atts = ctxt->atts;
7888 }
7889 atts[nbatts++] = attname;
7890 atts[nbatts++] = aprefix;
7891 if (aprefix == NULL)
7892 atts[nbatts++] = NULL;
7893 else
7894 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7895 atts[nbatts++] = defaults->values[4 * i + 2];
7896 atts[nbatts++] = defaults->values[4 * i + 3];
7897 nbdef++;
7898 }
7899 }
7900 }
7901 }
7902
7903 nsname = xmlGetNamespace(ctxt, prefix);
7904 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007905 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7906 "Namespace prefix %s on %s is not defined\n",
7907 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007908 }
7909 *pref = prefix;
7910 *URI = nsname;
7911
7912 /*
7913 * SAX: Start of Element !
7914 */
7915 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7916 (!ctxt->disableSAX)) {
7917 if (nbNs > 0)
7918 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7919 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7920 nbatts / 5, nbdef, atts);
7921 else
7922 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7923 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7924 }
7925
7926 /*
7927 * Free up attribute allocated strings if needed
7928 */
7929 if (attval != 0) {
7930 for (i = 3,j = 0; j < nratts;i += 5,j++)
7931 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7932 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007933 }
7934
7935 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007936
7937base_changed:
7938 /*
7939 * the attribute strings are valid iif the base didn't changed
7940 */
7941 if (attval != 0) {
7942 for (i = 3,j = 0; j < nratts;i += 5,j++)
7943 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7944 xmlFree((xmlChar *) atts[i]);
7945 }
7946 ctxt->input->cur = ctxt->input->base + cur;
7947 if (ctxt->wellFormed == 1) {
7948 goto reparse;
7949 }
7950 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007951}
7952
7953/**
7954 * xmlParseEndTag2:
7955 * @ctxt: an XML parser context
7956 * @line: line of the start tag
7957 * @nsNr: number of namespaces on the start tag
7958 *
7959 * parse an end of tag
7960 *
7961 * [42] ETag ::= '</' Name S? '>'
7962 *
7963 * With namespace
7964 *
7965 * [NS 9] ETag ::= '</' QName S? '>'
7966 */
7967
7968static void
7969xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7970 const xmlChar *URI, int line, int nsNr) {
7971 const xmlChar *name;
7972
7973 GROW;
7974 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007975 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007976 return;
7977 }
7978 SKIP(2);
7979
7980 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7981
7982 /*
7983 * We should definitely be at the ending "S? '>'" part
7984 */
7985 GROW;
7986 SKIP_BLANKS;
7987 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007988 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007989 } else
7990 NEXT1;
7991
7992 /*
7993 * [ WFC: Element Type Match ]
7994 * The Name in an element's end-tag must match the element type in the
7995 * start-tag.
7996 *
7997 */
7998 if (name != (xmlChar*)1) {
7999 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
8000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
8001 if (name != NULL) {
8002 ctxt->sax->error(ctxt->userData,
8003 "Opening and ending tag mismatch: %s line %d and %s\n",
8004 ctxt->name, line, name);
8005 } else {
8006 ctxt->sax->error(ctxt->userData,
8007 "Ending tag error for: %s line %d\n", ctxt->name, line);
8008 }
8009
8010 }
8011 ctxt->wellFormed = 0;
8012 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8013 }
8014
8015 /*
8016 * SAX: End of Tag
8017 */
8018 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8019 (!ctxt->disableSAX))
8020 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8021
Daniel Veillard0fb18932003-09-07 09:14:37 +00008022 spacePop(ctxt);
8023 if (nsNr != 0)
8024 nsPop(ctxt, nsNr);
8025 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008026}
8027
8028/**
Owen Taylor3473f882001-02-23 17:55:21 +00008029 * xmlParseCDSect:
8030 * @ctxt: an XML parser context
8031 *
8032 * Parse escaped pure raw content.
8033 *
8034 * [18] CDSect ::= CDStart CData CDEnd
8035 *
8036 * [19] CDStart ::= '<![CDATA['
8037 *
8038 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8039 *
8040 * [21] CDEnd ::= ']]>'
8041 */
8042void
8043xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8044 xmlChar *buf = NULL;
8045 int len = 0;
8046 int size = XML_PARSER_BUFFER_SIZE;
8047 int r, rl;
8048 int s, sl;
8049 int cur, l;
8050 int count = 0;
8051
8052 if ((NXT(0) == '<') && (NXT(1) == '!') &&
8053 (NXT(2) == '[') && (NXT(3) == 'C') &&
8054 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8055 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8056 (NXT(8) == '[')) {
8057 SKIP(9);
8058 } else
8059 return;
8060
8061 ctxt->instate = XML_PARSER_CDATA_SECTION;
8062 r = CUR_CHAR(rl);
8063 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008064 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008065 ctxt->instate = XML_PARSER_CONTENT;
8066 return;
8067 }
8068 NEXTL(rl);
8069 s = CUR_CHAR(sl);
8070 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008071 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008072 ctxt->instate = XML_PARSER_CONTENT;
8073 return;
8074 }
8075 NEXTL(sl);
8076 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008077 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008078 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008079 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008080 return;
8081 }
8082 while (IS_CHAR(cur) &&
8083 ((r != ']') || (s != ']') || (cur != '>'))) {
8084 if (len + 5 >= size) {
8085 size *= 2;
8086 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8087 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008088 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008089 return;
8090 }
8091 }
8092 COPY_BUF(rl,buf,len,r);
8093 r = s;
8094 rl = sl;
8095 s = cur;
8096 sl = l;
8097 count++;
8098 if (count > 50) {
8099 GROW;
8100 count = 0;
8101 }
8102 NEXTL(l);
8103 cur = CUR_CHAR(l);
8104 }
8105 buf[len] = 0;
8106 ctxt->instate = XML_PARSER_CONTENT;
8107 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008108 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008109 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008110 xmlFree(buf);
8111 return;
8112 }
8113 NEXTL(l);
8114
8115 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008116 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008117 */
8118 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8119 if (ctxt->sax->cdataBlock != NULL)
8120 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008121 else if (ctxt->sax->characters != NULL)
8122 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008123 }
8124 xmlFree(buf);
8125}
8126
8127/**
8128 * xmlParseContent:
8129 * @ctxt: an XML parser context
8130 *
8131 * Parse a content:
8132 *
8133 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8134 */
8135
8136void
8137xmlParseContent(xmlParserCtxtPtr ctxt) {
8138 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008139 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008140 ((RAW != '<') || (NXT(1) != '/'))) {
8141 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008142 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008143 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008144
8145 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008146 * First case : a Processing Instruction.
8147 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008148 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008149 xmlParsePI(ctxt);
8150 }
8151
8152 /*
8153 * Second case : a CDSection
8154 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008155 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008156 (NXT(2) == '[') && (NXT(3) == 'C') &&
8157 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8158 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8159 (NXT(8) == '[')) {
8160 xmlParseCDSect(ctxt);
8161 }
8162
8163 /*
8164 * Third case : a comment
8165 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008166 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008167 (NXT(2) == '-') && (NXT(3) == '-')) {
8168 xmlParseComment(ctxt);
8169 ctxt->instate = XML_PARSER_CONTENT;
8170 }
8171
8172 /*
8173 * Fourth case : a sub-element.
8174 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008175 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008176 xmlParseElement(ctxt);
8177 }
8178
8179 /*
8180 * Fifth case : a reference. If if has not been resolved,
8181 * parsing returns it's Name, create the node
8182 */
8183
Daniel Veillard21a0f912001-02-25 19:54:14 +00008184 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008185 xmlParseReference(ctxt);
8186 }
8187
8188 /*
8189 * Last case, text. Note that References are handled directly.
8190 */
8191 else {
8192 xmlParseCharData(ctxt, 0);
8193 }
8194
8195 GROW;
8196 /*
8197 * Pop-up of finished entities.
8198 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008199 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008200 xmlPopInput(ctxt);
8201 SHRINK;
8202
Daniel Veillardfdc91562002-07-01 21:52:03 +00008203 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008204 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8205 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008206 ctxt->instate = XML_PARSER_EOF;
8207 break;
8208 }
8209 }
8210}
8211
8212/**
8213 * xmlParseElement:
8214 * @ctxt: an XML parser context
8215 *
8216 * parse an XML element, this is highly recursive
8217 *
8218 * [39] element ::= EmptyElemTag | STag content ETag
8219 *
8220 * [ WFC: Element Type Match ]
8221 * The Name in an element's end-tag must match the element type in the
8222 * start-tag.
8223 *
Owen Taylor3473f882001-02-23 17:55:21 +00008224 */
8225
8226void
8227xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008228 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008229 const xmlChar *prefix;
8230 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008231 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008232 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008233 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008235
8236 /* Capture start position */
8237 if (ctxt->record_info) {
8238 node_info.begin_pos = ctxt->input->consumed +
8239 (CUR_PTR - ctxt->input->base);
8240 node_info.begin_line = ctxt->input->line;
8241 }
8242
8243 if (ctxt->spaceNr == 0)
8244 spacePush(ctxt, -1);
8245 else
8246 spacePush(ctxt, *ctxt->space);
8247
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008248 line = ctxt->input->line;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008249 if (ctxt->sax2)
8250 name = xmlParseStartTag2(ctxt, &prefix, &URI);
8251 else
8252 name = xmlParseStartTag(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008253 if (name == NULL) {
8254 spacePop(ctxt);
8255 return;
8256 }
8257 namePush(ctxt, name);
8258 ret = ctxt->node;
8259
Daniel Veillard4432df22003-09-28 18:58:27 +00008260#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008261 /*
8262 * [ VC: Root Element Type ]
8263 * The Name in the document type declaration must match the element
8264 * type of the root element.
8265 */
8266 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8267 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8268 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008269#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008270
8271 /*
8272 * Check for an Empty Element.
8273 */
8274 if ((RAW == '/') && (NXT(1) == '>')) {
8275 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 if (ctxt->sax2) {
8277 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8278 (!ctxt->disableSAX))
8279 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8280 } else {
8281 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8282 (!ctxt->disableSAX))
8283 ctxt->sax->endElement(ctxt->userData, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008284 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008285 namePop(ctxt);
8286 spacePop(ctxt);
8287 if (nsNr != ctxt->nsNr)
8288 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008289 if ( ret != NULL && ctxt->record_info ) {
8290 node_info.end_pos = ctxt->input->consumed +
8291 (CUR_PTR - ctxt->input->base);
8292 node_info.end_line = ctxt->input->line;
8293 node_info.node = ret;
8294 xmlParserAddNodeInfo(ctxt, &node_info);
8295 }
8296 return;
8297 }
8298 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008299 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008300 } else {
8301 ctxt->errNo = XML_ERR_GT_REQUIRED;
8302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8303 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008304 "Couldn't find end of Start Tag %s line %d\n",
8305 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008306 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008307 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008308
8309 /*
8310 * end of parsing of this node.
8311 */
8312 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008313 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008314 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008315 if (nsNr != ctxt->nsNr)
8316 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008317
8318 /*
8319 * Capture end position and add node
8320 */
8321 if ( ret != NULL && ctxt->record_info ) {
8322 node_info.end_pos = ctxt->input->consumed +
8323 (CUR_PTR - ctxt->input->base);
8324 node_info.end_line = ctxt->input->line;
8325 node_info.node = ret;
8326 xmlParserAddNodeInfo(ctxt, &node_info);
8327 }
8328 return;
8329 }
8330
8331 /*
8332 * Parse the content of the element:
8333 */
8334 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00008335 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00008336 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00008337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8338 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008339 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008340 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008342
8343 /*
8344 * end of parsing of this node.
8345 */
8346 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008347 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008348 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008349 if (nsNr != ctxt->nsNr)
8350 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008351 return;
8352 }
8353
8354 /*
8355 * parse the end of tag: '</' should be here.
8356 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008357 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008358 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008359 namePop(ctxt);
8360 } else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008361 xmlParseEndTag1(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008362
8363 /*
8364 * Capture end position and add node
8365 */
8366 if ( ret != NULL && ctxt->record_info ) {
8367 node_info.end_pos = ctxt->input->consumed +
8368 (CUR_PTR - ctxt->input->base);
8369 node_info.end_line = ctxt->input->line;
8370 node_info.node = ret;
8371 xmlParserAddNodeInfo(ctxt, &node_info);
8372 }
8373}
8374
8375/**
8376 * xmlParseVersionNum:
8377 * @ctxt: an XML parser context
8378 *
8379 * parse the XML version value.
8380 *
8381 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8382 *
8383 * Returns the string giving the XML version number, or NULL
8384 */
8385xmlChar *
8386xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8387 xmlChar *buf = NULL;
8388 int len = 0;
8389 int size = 10;
8390 xmlChar cur;
8391
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008392 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008393 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008394 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008395 return(NULL);
8396 }
8397 cur = CUR;
8398 while (((cur >= 'a') && (cur <= 'z')) ||
8399 ((cur >= 'A') && (cur <= 'Z')) ||
8400 ((cur >= '0') && (cur <= '9')) ||
8401 (cur == '_') || (cur == '.') ||
8402 (cur == ':') || (cur == '-')) {
8403 if (len + 1 >= size) {
8404 size *= 2;
8405 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8406 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008407 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008408 return(NULL);
8409 }
8410 }
8411 buf[len++] = cur;
8412 NEXT;
8413 cur=CUR;
8414 }
8415 buf[len] = 0;
8416 return(buf);
8417}
8418
8419/**
8420 * xmlParseVersionInfo:
8421 * @ctxt: an XML parser context
8422 *
8423 * parse the XML version.
8424 *
8425 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8426 *
8427 * [25] Eq ::= S? '=' S?
8428 *
8429 * Returns the version string, e.g. "1.0"
8430 */
8431
8432xmlChar *
8433xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8434 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008435
8436 if ((RAW == 'v') && (NXT(1) == 'e') &&
8437 (NXT(2) == 'r') && (NXT(3) == 's') &&
8438 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8439 (NXT(6) == 'n')) {
8440 SKIP(7);
8441 SKIP_BLANKS;
8442 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008443 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008444 return(NULL);
8445 }
8446 NEXT;
8447 SKIP_BLANKS;
8448 if (RAW == '"') {
8449 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008450 version = xmlParseVersionNum(ctxt);
8451 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008452 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008453 } else
8454 NEXT;
8455 } else if (RAW == '\''){
8456 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008457 version = xmlParseVersionNum(ctxt);
8458 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008459 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008460 } else
8461 NEXT;
8462 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008463 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008464 }
8465 }
8466 return(version);
8467}
8468
8469/**
8470 * xmlParseEncName:
8471 * @ctxt: an XML parser context
8472 *
8473 * parse the XML encoding name
8474 *
8475 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8476 *
8477 * Returns the encoding name value or NULL
8478 */
8479xmlChar *
8480xmlParseEncName(xmlParserCtxtPtr ctxt) {
8481 xmlChar *buf = NULL;
8482 int len = 0;
8483 int size = 10;
8484 xmlChar cur;
8485
8486 cur = CUR;
8487 if (((cur >= 'a') && (cur <= 'z')) ||
8488 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008489 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008490 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008491 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008492 return(NULL);
8493 }
8494
8495 buf[len++] = cur;
8496 NEXT;
8497 cur = CUR;
8498 while (((cur >= 'a') && (cur <= 'z')) ||
8499 ((cur >= 'A') && (cur <= 'Z')) ||
8500 ((cur >= '0') && (cur <= '9')) ||
8501 (cur == '.') || (cur == '_') ||
8502 (cur == '-')) {
8503 if (len + 1 >= size) {
8504 size *= 2;
8505 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8506 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008507 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008508 return(NULL);
8509 }
8510 }
8511 buf[len++] = cur;
8512 NEXT;
8513 cur = CUR;
8514 if (cur == 0) {
8515 SHRINK;
8516 GROW;
8517 cur = CUR;
8518 }
8519 }
8520 buf[len] = 0;
8521 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008522 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008523 }
8524 return(buf);
8525}
8526
8527/**
8528 * xmlParseEncodingDecl:
8529 * @ctxt: an XML parser context
8530 *
8531 * parse the XML encoding declaration
8532 *
8533 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8534 *
8535 * this setups the conversion filters.
8536 *
8537 * Returns the encoding value or NULL
8538 */
8539
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008540const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008541xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8542 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008543
8544 SKIP_BLANKS;
8545 if ((RAW == 'e') && (NXT(1) == 'n') &&
8546 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8547 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8548 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8549 SKIP(8);
8550 SKIP_BLANKS;
8551 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008552 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008553 return(NULL);
8554 }
8555 NEXT;
8556 SKIP_BLANKS;
8557 if (RAW == '"') {
8558 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008559 encoding = xmlParseEncName(ctxt);
8560 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008561 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008562 } else
8563 NEXT;
8564 } else if (RAW == '\''){
8565 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008566 encoding = xmlParseEncName(ctxt);
8567 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008568 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008569 } else
8570 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008571 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008572 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008573 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008574 /*
8575 * UTF-16 encoding stwich has already taken place at this stage,
8576 * more over the little-endian/big-endian selection is already done
8577 */
8578 if ((encoding != NULL) &&
8579 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8580 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008581 if (ctxt->encoding != NULL)
8582 xmlFree((xmlChar *) ctxt->encoding);
8583 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008584 }
8585 /*
8586 * UTF-8 encoding is handled natively
8587 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008588 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008589 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8590 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008591 if (ctxt->encoding != NULL)
8592 xmlFree((xmlChar *) ctxt->encoding);
8593 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008594 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008595 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008596 xmlCharEncodingHandlerPtr handler;
8597
8598 if (ctxt->input->encoding != NULL)
8599 xmlFree((xmlChar *) ctxt->input->encoding);
8600 ctxt->input->encoding = encoding;
8601
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008602 handler = xmlFindCharEncodingHandler((const char *) encoding);
8603 if (handler != NULL) {
8604 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008605 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008606 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8608 ctxt->sax->error(ctxt->userData,
8609 "Unsupported encoding %s\n", encoding);
8610 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008611 }
8612 }
8613 }
8614 return(encoding);
8615}
8616
8617/**
8618 * xmlParseSDDecl:
8619 * @ctxt: an XML parser context
8620 *
8621 * parse the XML standalone declaration
8622 *
8623 * [32] SDDecl ::= S 'standalone' Eq
8624 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8625 *
8626 * [ VC: Standalone Document Declaration ]
8627 * TODO The standalone document declaration must have the value "no"
8628 * if any external markup declarations contain declarations of:
8629 * - attributes with default values, if elements to which these
8630 * attributes apply appear in the document without specifications
8631 * of values for these attributes, or
8632 * - entities (other than amp, lt, gt, apos, quot), if references
8633 * to those entities appear in the document, or
8634 * - attributes with values subject to normalization, where the
8635 * attribute appears in the document with a value which will change
8636 * as a result of normalization, or
8637 * - element types with element content, if white space occurs directly
8638 * within any instance of those types.
8639 *
8640 * Returns 1 if standalone, 0 otherwise
8641 */
8642
8643int
8644xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8645 int standalone = -1;
8646
8647 SKIP_BLANKS;
8648 if ((RAW == 's') && (NXT(1) == 't') &&
8649 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8650 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8651 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8652 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8653 SKIP(10);
8654 SKIP_BLANKS;
8655 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008656 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008657 return(standalone);
8658 }
8659 NEXT;
8660 SKIP_BLANKS;
8661 if (RAW == '\''){
8662 NEXT;
8663 if ((RAW == 'n') && (NXT(1) == 'o')) {
8664 standalone = 0;
8665 SKIP(2);
8666 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8667 (NXT(2) == 's')) {
8668 standalone = 1;
8669 SKIP(3);
8670 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008671 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008672 }
8673 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008674 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008675 } else
8676 NEXT;
8677 } else if (RAW == '"'){
8678 NEXT;
8679 if ((RAW == 'n') && (NXT(1) == 'o')) {
8680 standalone = 0;
8681 SKIP(2);
8682 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8683 (NXT(2) == 's')) {
8684 standalone = 1;
8685 SKIP(3);
8686 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008687 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008688 }
8689 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008690 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008691 } else
8692 NEXT;
8693 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008694 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008695 }
8696 }
8697 return(standalone);
8698}
8699
8700/**
8701 * xmlParseXMLDecl:
8702 * @ctxt: an XML parser context
8703 *
8704 * parse an XML declaration header
8705 *
8706 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8707 */
8708
8709void
8710xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8711 xmlChar *version;
8712
8713 /*
8714 * We know that '<?xml' is here.
8715 */
8716 SKIP(5);
8717
8718 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008719 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8720 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008721 }
8722 SKIP_BLANKS;
8723
8724 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008725 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008726 */
8727 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008728 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008729 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008730 } else {
8731 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8732 /*
8733 * TODO: Blueberry should be detected here
8734 */
8735 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
8736 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
8737 version);
8738 }
8739 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008740 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008741 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008742 }
Owen Taylor3473f882001-02-23 17:55:21 +00008743
8744 /*
8745 * We may have the encoding declaration
8746 */
8747 if (!IS_BLANK(RAW)) {
8748 if ((RAW == '?') && (NXT(1) == '>')) {
8749 SKIP(2);
8750 return;
8751 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008753 }
8754 xmlParseEncodingDecl(ctxt);
8755 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8756 /*
8757 * The XML REC instructs us to stop parsing right here
8758 */
8759 return;
8760 }
8761
8762 /*
8763 * We may have the standalone status.
8764 */
8765 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8766 if ((RAW == '?') && (NXT(1) == '>')) {
8767 SKIP(2);
8768 return;
8769 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008770 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008771 }
8772 SKIP_BLANKS;
8773 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8774
8775 SKIP_BLANKS;
8776 if ((RAW == '?') && (NXT(1) == '>')) {
8777 SKIP(2);
8778 } else if (RAW == '>') {
8779 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008780 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008781 NEXT;
8782 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008783 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008784 MOVETO_ENDTAG(CUR_PTR);
8785 NEXT;
8786 }
8787}
8788
8789/**
8790 * xmlParseMisc:
8791 * @ctxt: an XML parser context
8792 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008793 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008794 *
8795 * [27] Misc ::= Comment | PI | S
8796 */
8797
8798void
8799xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008800 while (((RAW == '<') && (NXT(1) == '?')) ||
8801 ((RAW == '<') && (NXT(1) == '!') &&
8802 (NXT(2) == '-') && (NXT(3) == '-')) ||
8803 IS_BLANK(CUR)) {
8804 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008805 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008806 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008807 NEXT;
8808 } else
8809 xmlParseComment(ctxt);
8810 }
8811}
8812
8813/**
8814 * xmlParseDocument:
8815 * @ctxt: an XML parser context
8816 *
8817 * parse an XML document (and build a tree if using the standard SAX
8818 * interface).
8819 *
8820 * [1] document ::= prolog element Misc*
8821 *
8822 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8823 *
8824 * Returns 0, -1 in case of error. the parser context is augmented
8825 * as a result of the parsing.
8826 */
8827
8828int
8829xmlParseDocument(xmlParserCtxtPtr ctxt) {
8830 xmlChar start[4];
8831 xmlCharEncoding enc;
8832
8833 xmlInitParser();
8834
8835 GROW;
8836
8837 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008838 * SAX: detecting the level.
8839 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008840 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008841
8842 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008843 * SAX: beginning of the document processing.
8844 */
8845 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8846 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8847
Daniel Veillard50f34372001-08-03 12:06:36 +00008848 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008849 /*
8850 * Get the 4 first bytes and decode the charset
8851 * if enc != XML_CHAR_ENCODING_NONE
8852 * plug some encoding conversion routines.
8853 */
8854 start[0] = RAW;
8855 start[1] = NXT(1);
8856 start[2] = NXT(2);
8857 start[3] = NXT(3);
8858 enc = xmlDetectCharEncoding(start, 4);
8859 if (enc != XML_CHAR_ENCODING_NONE) {
8860 xmlSwitchEncoding(ctxt, enc);
8861 }
Owen Taylor3473f882001-02-23 17:55:21 +00008862 }
8863
8864
8865 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008866 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008867 }
8868
8869 /*
8870 * Check for the XMLDecl in the Prolog.
8871 */
8872 GROW;
8873 if ((RAW == '<') && (NXT(1) == '?') &&
8874 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8875 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8876
8877 /*
8878 * Note that we will switch encoding on the fly.
8879 */
8880 xmlParseXMLDecl(ctxt);
8881 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8882 /*
8883 * The XML REC instructs us to stop parsing right here
8884 */
8885 return(-1);
8886 }
8887 ctxt->standalone = ctxt->input->standalone;
8888 SKIP_BLANKS;
8889 } else {
8890 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8891 }
8892 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8893 ctxt->sax->startDocument(ctxt->userData);
8894
8895 /*
8896 * The Misc part of the Prolog
8897 */
8898 GROW;
8899 xmlParseMisc(ctxt);
8900
8901 /*
8902 * Then possibly doc type declaration(s) and more Misc
8903 * (doctypedecl Misc*)?
8904 */
8905 GROW;
8906 if ((RAW == '<') && (NXT(1) == '!') &&
8907 (NXT(2) == 'D') && (NXT(3) == 'O') &&
8908 (NXT(4) == 'C') && (NXT(5) == 'T') &&
8909 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8910 (NXT(8) == 'E')) {
8911
8912 ctxt->inSubset = 1;
8913 xmlParseDocTypeDecl(ctxt);
8914 if (RAW == '[') {
8915 ctxt->instate = XML_PARSER_DTD;
8916 xmlParseInternalSubset(ctxt);
8917 }
8918
8919 /*
8920 * Create and update the external subset.
8921 */
8922 ctxt->inSubset = 2;
8923 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8924 (!ctxt->disableSAX))
8925 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8926 ctxt->extSubSystem, ctxt->extSubURI);
8927 ctxt->inSubset = 0;
8928
8929
8930 ctxt->instate = XML_PARSER_PROLOG;
8931 xmlParseMisc(ctxt);
8932 }
8933
8934 /*
8935 * Time to start parsing the tree itself
8936 */
8937 GROW;
8938 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008939 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8940 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008941 } else {
8942 ctxt->instate = XML_PARSER_CONTENT;
8943 xmlParseElement(ctxt);
8944 ctxt->instate = XML_PARSER_EPILOG;
8945
8946
8947 /*
8948 * The Misc part at the end
8949 */
8950 xmlParseMisc(ctxt);
8951
Daniel Veillard561b7f82002-03-20 21:55:57 +00008952 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008953 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008954 }
8955 ctxt->instate = XML_PARSER_EOF;
8956 }
8957
8958 /*
8959 * SAX: end of the document processing.
8960 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008961 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008962 ctxt->sax->endDocument(ctxt->userData);
8963
Daniel Veillard5997aca2002-03-18 18:36:20 +00008964 /*
8965 * Remove locally kept entity definitions if the tree was not built
8966 */
8967 if ((ctxt->myDoc != NULL) &&
8968 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8969 xmlFreeDoc(ctxt->myDoc);
8970 ctxt->myDoc = NULL;
8971 }
8972
Daniel Veillardc7612992002-02-17 22:47:37 +00008973 if (! ctxt->wellFormed) {
8974 ctxt->valid = 0;
8975 return(-1);
8976 }
Owen Taylor3473f882001-02-23 17:55:21 +00008977 return(0);
8978}
8979
8980/**
8981 * xmlParseExtParsedEnt:
8982 * @ctxt: an XML parser context
8983 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008984 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008985 * An external general parsed entity is well-formed if it matches the
8986 * production labeled extParsedEnt.
8987 *
8988 * [78] extParsedEnt ::= TextDecl? content
8989 *
8990 * Returns 0, -1 in case of error. the parser context is augmented
8991 * as a result of the parsing.
8992 */
8993
8994int
8995xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8996 xmlChar start[4];
8997 xmlCharEncoding enc;
8998
8999 xmlDefaultSAXHandlerInit();
9000
Daniel Veillard309f81d2003-09-23 09:02:53 +00009001 xmlDetectSAX2(ctxt);
9002
Owen Taylor3473f882001-02-23 17:55:21 +00009003 GROW;
9004
9005 /*
9006 * SAX: beginning of the document processing.
9007 */
9008 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9009 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9010
9011 /*
9012 * Get the 4 first bytes and decode the charset
9013 * if enc != XML_CHAR_ENCODING_NONE
9014 * plug some encoding conversion routines.
9015 */
9016 start[0] = RAW;
9017 start[1] = NXT(1);
9018 start[2] = NXT(2);
9019 start[3] = NXT(3);
9020 enc = xmlDetectCharEncoding(start, 4);
9021 if (enc != XML_CHAR_ENCODING_NONE) {
9022 xmlSwitchEncoding(ctxt, enc);
9023 }
9024
9025
9026 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009027 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009028 }
9029
9030 /*
9031 * Check for the XMLDecl in the Prolog.
9032 */
9033 GROW;
9034 if ((RAW == '<') && (NXT(1) == '?') &&
9035 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9036 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9037
9038 /*
9039 * Note that we will switch encoding on the fly.
9040 */
9041 xmlParseXMLDecl(ctxt);
9042 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9043 /*
9044 * The XML REC instructs us to stop parsing right here
9045 */
9046 return(-1);
9047 }
9048 SKIP_BLANKS;
9049 } else {
9050 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9051 }
9052 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9053 ctxt->sax->startDocument(ctxt->userData);
9054
9055 /*
9056 * Doing validity checking on chunk doesn't make sense
9057 */
9058 ctxt->instate = XML_PARSER_CONTENT;
9059 ctxt->validate = 0;
9060 ctxt->loadsubset = 0;
9061 ctxt->depth = 0;
9062
9063 xmlParseContent(ctxt);
9064
9065 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009066 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009067 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009068 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009069 }
9070
9071 /*
9072 * SAX: end of the document processing.
9073 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009074 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009075 ctxt->sax->endDocument(ctxt->userData);
9076
9077 if (! ctxt->wellFormed) return(-1);
9078 return(0);
9079}
9080
9081/************************************************************************
9082 * *
9083 * Progressive parsing interfaces *
9084 * *
9085 ************************************************************************/
9086
9087/**
9088 * xmlParseLookupSequence:
9089 * @ctxt: an XML parser context
9090 * @first: the first char to lookup
9091 * @next: the next char to lookup or zero
9092 * @third: the next char to lookup or zero
9093 *
9094 * Try to find if a sequence (first, next, third) or just (first next) or
9095 * (first) is available in the input stream.
9096 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9097 * to avoid rescanning sequences of bytes, it DOES change the state of the
9098 * parser, do not use liberally.
9099 *
9100 * Returns the index to the current parsing point if the full sequence
9101 * is available, -1 otherwise.
9102 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009103static int
Owen Taylor3473f882001-02-23 17:55:21 +00009104xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9105 xmlChar next, xmlChar third) {
9106 int base, len;
9107 xmlParserInputPtr in;
9108 const xmlChar *buf;
9109
9110 in = ctxt->input;
9111 if (in == NULL) return(-1);
9112 base = in->cur - in->base;
9113 if (base < 0) return(-1);
9114 if (ctxt->checkIndex > base)
9115 base = ctxt->checkIndex;
9116 if (in->buf == NULL) {
9117 buf = in->base;
9118 len = in->length;
9119 } else {
9120 buf = in->buf->buffer->content;
9121 len = in->buf->buffer->use;
9122 }
9123 /* take into account the sequence length */
9124 if (third) len -= 2;
9125 else if (next) len --;
9126 for (;base < len;base++) {
9127 if (buf[base] == first) {
9128 if (third != 0) {
9129 if ((buf[base + 1] != next) ||
9130 (buf[base + 2] != third)) continue;
9131 } else if (next != 0) {
9132 if (buf[base + 1] != next) continue;
9133 }
9134 ctxt->checkIndex = 0;
9135#ifdef DEBUG_PUSH
9136 if (next == 0)
9137 xmlGenericError(xmlGenericErrorContext,
9138 "PP: lookup '%c' found at %d\n",
9139 first, base);
9140 else if (third == 0)
9141 xmlGenericError(xmlGenericErrorContext,
9142 "PP: lookup '%c%c' found at %d\n",
9143 first, next, base);
9144 else
9145 xmlGenericError(xmlGenericErrorContext,
9146 "PP: lookup '%c%c%c' found at %d\n",
9147 first, next, third, base);
9148#endif
9149 return(base - (in->cur - in->base));
9150 }
9151 }
9152 ctxt->checkIndex = base;
9153#ifdef DEBUG_PUSH
9154 if (next == 0)
9155 xmlGenericError(xmlGenericErrorContext,
9156 "PP: lookup '%c' failed\n", first);
9157 else if (third == 0)
9158 xmlGenericError(xmlGenericErrorContext,
9159 "PP: lookup '%c%c' failed\n", first, next);
9160 else
9161 xmlGenericError(xmlGenericErrorContext,
9162 "PP: lookup '%c%c%c' failed\n", first, next, third);
9163#endif
9164 return(-1);
9165}
9166
9167/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009168 * xmlParseGetLasts:
9169 * @ctxt: an XML parser context
9170 * @lastlt: pointer to store the last '<' from the input
9171 * @lastgt: pointer to store the last '>' from the input
9172 *
9173 * Lookup the last < and > in the current chunk
9174 */
9175static void
9176xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9177 const xmlChar **lastgt) {
9178 const xmlChar *tmp;
9179
9180 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9181 xmlGenericError(xmlGenericErrorContext,
9182 "Internal error: xmlParseGetLasts\n");
9183 return;
9184 }
9185 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9186 tmp = ctxt->input->end;
9187 tmp--;
9188 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9189 (*tmp != '>')) tmp--;
9190 if (tmp < ctxt->input->base) {
9191 *lastlt = NULL;
9192 *lastgt = NULL;
9193 } else if (*tmp == '<') {
9194 *lastlt = tmp;
9195 tmp--;
9196 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9197 if (tmp < ctxt->input->base)
9198 *lastgt = NULL;
9199 else
9200 *lastgt = tmp;
9201 } else {
9202 *lastgt = tmp;
9203 tmp--;
9204 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9205 if (tmp < ctxt->input->base)
9206 *lastlt = NULL;
9207 else
9208 *lastlt = tmp;
9209 }
9210
9211 } else {
9212 *lastlt = NULL;
9213 *lastgt = NULL;
9214 }
9215}
9216/**
Owen Taylor3473f882001-02-23 17:55:21 +00009217 * xmlParseTryOrFinish:
9218 * @ctxt: an XML parser context
9219 * @terminate: last chunk indicator
9220 *
9221 * Try to progress on parsing
9222 *
9223 * Returns zero if no parsing was possible
9224 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009225static int
Owen Taylor3473f882001-02-23 17:55:21 +00009226xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9227 int ret = 0;
9228 int avail;
9229 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009230 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009231
9232#ifdef DEBUG_PUSH
9233 switch (ctxt->instate) {
9234 case XML_PARSER_EOF:
9235 xmlGenericError(xmlGenericErrorContext,
9236 "PP: try EOF\n"); break;
9237 case XML_PARSER_START:
9238 xmlGenericError(xmlGenericErrorContext,
9239 "PP: try START\n"); break;
9240 case XML_PARSER_MISC:
9241 xmlGenericError(xmlGenericErrorContext,
9242 "PP: try MISC\n");break;
9243 case XML_PARSER_COMMENT:
9244 xmlGenericError(xmlGenericErrorContext,
9245 "PP: try COMMENT\n");break;
9246 case XML_PARSER_PROLOG:
9247 xmlGenericError(xmlGenericErrorContext,
9248 "PP: try PROLOG\n");break;
9249 case XML_PARSER_START_TAG:
9250 xmlGenericError(xmlGenericErrorContext,
9251 "PP: try START_TAG\n");break;
9252 case XML_PARSER_CONTENT:
9253 xmlGenericError(xmlGenericErrorContext,
9254 "PP: try CONTENT\n");break;
9255 case XML_PARSER_CDATA_SECTION:
9256 xmlGenericError(xmlGenericErrorContext,
9257 "PP: try CDATA_SECTION\n");break;
9258 case XML_PARSER_END_TAG:
9259 xmlGenericError(xmlGenericErrorContext,
9260 "PP: try END_TAG\n");break;
9261 case XML_PARSER_ENTITY_DECL:
9262 xmlGenericError(xmlGenericErrorContext,
9263 "PP: try ENTITY_DECL\n");break;
9264 case XML_PARSER_ENTITY_VALUE:
9265 xmlGenericError(xmlGenericErrorContext,
9266 "PP: try ENTITY_VALUE\n");break;
9267 case XML_PARSER_ATTRIBUTE_VALUE:
9268 xmlGenericError(xmlGenericErrorContext,
9269 "PP: try ATTRIBUTE_VALUE\n");break;
9270 case XML_PARSER_DTD:
9271 xmlGenericError(xmlGenericErrorContext,
9272 "PP: try DTD\n");break;
9273 case XML_PARSER_EPILOG:
9274 xmlGenericError(xmlGenericErrorContext,
9275 "PP: try EPILOG\n");break;
9276 case XML_PARSER_PI:
9277 xmlGenericError(xmlGenericErrorContext,
9278 "PP: try PI\n");break;
9279 case XML_PARSER_IGNORE:
9280 xmlGenericError(xmlGenericErrorContext,
9281 "PP: try IGNORE\n");break;
9282 }
9283#endif
9284
Daniel Veillarda880b122003-04-21 21:36:41 +00009285 if (ctxt->input->cur - ctxt->input->base > 4096) {
9286 xmlSHRINK(ctxt);
9287 ctxt->checkIndex = 0;
9288 }
9289 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009290
Daniel Veillarda880b122003-04-21 21:36:41 +00009291 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009292 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9293 return(0);
9294
9295
Owen Taylor3473f882001-02-23 17:55:21 +00009296 /*
9297 * Pop-up of finished entities.
9298 */
9299 while ((RAW == 0) && (ctxt->inputNr > 1))
9300 xmlPopInput(ctxt);
9301
9302 if (ctxt->input ==NULL) break;
9303 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009304 avail = ctxt->input->length -
9305 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009306 else {
9307 /*
9308 * If we are operating on converted input, try to flush
9309 * remainng chars to avoid them stalling in the non-converted
9310 * buffer.
9311 */
9312 if ((ctxt->input->buf->raw != NULL) &&
9313 (ctxt->input->buf->raw->use > 0)) {
9314 int base = ctxt->input->base -
9315 ctxt->input->buf->buffer->content;
9316 int current = ctxt->input->cur - ctxt->input->base;
9317
9318 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9319 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9320 ctxt->input->cur = ctxt->input->base + current;
9321 ctxt->input->end =
9322 &ctxt->input->buf->buffer->content[
9323 ctxt->input->buf->buffer->use];
9324 }
9325 avail = ctxt->input->buf->buffer->use -
9326 (ctxt->input->cur - ctxt->input->base);
9327 }
Owen Taylor3473f882001-02-23 17:55:21 +00009328 if (avail < 1)
9329 goto done;
9330 switch (ctxt->instate) {
9331 case XML_PARSER_EOF:
9332 /*
9333 * Document parsing is done !
9334 */
9335 goto done;
9336 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009337 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9338 xmlChar start[4];
9339 xmlCharEncoding enc;
9340
9341 /*
9342 * Very first chars read from the document flow.
9343 */
9344 if (avail < 4)
9345 goto done;
9346
9347 /*
9348 * Get the 4 first bytes and decode the charset
9349 * if enc != XML_CHAR_ENCODING_NONE
9350 * plug some encoding conversion routines.
9351 */
9352 start[0] = RAW;
9353 start[1] = NXT(1);
9354 start[2] = NXT(2);
9355 start[3] = NXT(3);
9356 enc = xmlDetectCharEncoding(start, 4);
9357 if (enc != XML_CHAR_ENCODING_NONE) {
9358 xmlSwitchEncoding(ctxt, enc);
9359 }
9360 break;
9361 }
Owen Taylor3473f882001-02-23 17:55:21 +00009362
9363 cur = ctxt->input->cur[0];
9364 next = ctxt->input->cur[1];
9365 if (cur == 0) {
9366 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9367 ctxt->sax->setDocumentLocator(ctxt->userData,
9368 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009369 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009370 ctxt->instate = XML_PARSER_EOF;
9371#ifdef DEBUG_PUSH
9372 xmlGenericError(xmlGenericErrorContext,
9373 "PP: entering EOF\n");
9374#endif
9375 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9376 ctxt->sax->endDocument(ctxt->userData);
9377 goto done;
9378 }
9379 if ((cur == '<') && (next == '?')) {
9380 /* PI or XML decl */
9381 if (avail < 5) return(ret);
9382 if ((!terminate) &&
9383 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9384 return(ret);
9385 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9386 ctxt->sax->setDocumentLocator(ctxt->userData,
9387 &xmlDefaultSAXLocator);
9388 if ((ctxt->input->cur[2] == 'x') &&
9389 (ctxt->input->cur[3] == 'm') &&
9390 (ctxt->input->cur[4] == 'l') &&
9391 (IS_BLANK(ctxt->input->cur[5]))) {
9392 ret += 5;
9393#ifdef DEBUG_PUSH
9394 xmlGenericError(xmlGenericErrorContext,
9395 "PP: Parsing XML Decl\n");
9396#endif
9397 xmlParseXMLDecl(ctxt);
9398 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9399 /*
9400 * The XML REC instructs us to stop parsing right
9401 * here
9402 */
9403 ctxt->instate = XML_PARSER_EOF;
9404 return(0);
9405 }
9406 ctxt->standalone = ctxt->input->standalone;
9407 if ((ctxt->encoding == NULL) &&
9408 (ctxt->input->encoding != NULL))
9409 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9410 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9411 (!ctxt->disableSAX))
9412 ctxt->sax->startDocument(ctxt->userData);
9413 ctxt->instate = XML_PARSER_MISC;
9414#ifdef DEBUG_PUSH
9415 xmlGenericError(xmlGenericErrorContext,
9416 "PP: entering MISC\n");
9417#endif
9418 } else {
9419 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9420 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9421 (!ctxt->disableSAX))
9422 ctxt->sax->startDocument(ctxt->userData);
9423 ctxt->instate = XML_PARSER_MISC;
9424#ifdef DEBUG_PUSH
9425 xmlGenericError(xmlGenericErrorContext,
9426 "PP: entering MISC\n");
9427#endif
9428 }
9429 } else {
9430 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9431 ctxt->sax->setDocumentLocator(ctxt->userData,
9432 &xmlDefaultSAXLocator);
9433 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9434 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9435 (!ctxt->disableSAX))
9436 ctxt->sax->startDocument(ctxt->userData);
9437 ctxt->instate = XML_PARSER_MISC;
9438#ifdef DEBUG_PUSH
9439 xmlGenericError(xmlGenericErrorContext,
9440 "PP: entering MISC\n");
9441#endif
9442 }
9443 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009444 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009445 const xmlChar *name;
9446 const xmlChar *prefix;
9447 const xmlChar *URI;
9448 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009449
9450 if ((avail < 2) && (ctxt->inputNr == 1))
9451 goto done;
9452 cur = ctxt->input->cur[0];
9453 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009454 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009455 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9457 ctxt->sax->endDocument(ctxt->userData);
9458 goto done;
9459 }
9460 if (!terminate) {
9461 if (ctxt->progressive) {
9462 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9463 goto done;
9464 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9465 goto done;
9466 }
9467 }
9468 if (ctxt->spaceNr == 0)
9469 spacePush(ctxt, -1);
9470 else
9471 spacePush(ctxt, *ctxt->space);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009472 if (ctxt->sax2)
9473 name = xmlParseStartTag2(ctxt, &prefix, &URI);
9474 else
9475 name = xmlParseStartTag(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 if (name == NULL) {
9477 spacePop(ctxt);
9478 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009479 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9480 ctxt->sax->endDocument(ctxt->userData);
9481 goto done;
9482 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009483#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 /*
9485 * [ VC: Root Element Type ]
9486 * The Name in the document type declaration must match
9487 * the element type of the root element.
9488 */
9489 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9490 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9491 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009492#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009493
9494 /*
9495 * Check for an Empty Element.
9496 */
9497 if ((RAW == '/') && (NXT(1) == '>')) {
9498 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009499
9500 if (ctxt->sax2) {
9501 if ((ctxt->sax != NULL) &&
9502 (ctxt->sax->endElementNs != NULL) &&
9503 (!ctxt->disableSAX))
9504 ctxt->sax->endElementNs(ctxt->userData, name,
9505 prefix, URI);
9506 } else {
9507 if ((ctxt->sax != NULL) &&
9508 (ctxt->sax->endElement != NULL) &&
9509 (!ctxt->disableSAX))
9510 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009511 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009512 spacePop(ctxt);
9513 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009514 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009515 } else {
9516 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009517 }
9518 break;
9519 }
9520 if (RAW == '>') {
9521 NEXT;
9522 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009523 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009524 "Couldn't find end of Start Tag %s\n",
9525 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009526 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009527 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009528 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009529 if (ctxt->sax2)
9530 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9531 else
9532 namePush(ctxt, name);
9533
Daniel Veillarda880b122003-04-21 21:36:41 +00009534 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009535 break;
9536 }
9537 case XML_PARSER_CONTENT: {
9538 const xmlChar *test;
9539 unsigned int cons;
9540 if ((avail < 2) && (ctxt->inputNr == 1))
9541 goto done;
9542 cur = ctxt->input->cur[0];
9543 next = ctxt->input->cur[1];
9544
9545 test = CUR_PTR;
9546 cons = ctxt->input->consumed;
9547 if ((cur == '<') && (next == '/')) {
9548 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009549 break;
9550 } else if ((cur == '<') && (next == '?')) {
9551 if ((!terminate) &&
9552 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9553 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 xmlParsePI(ctxt);
9555 } else if ((cur == '<') && (next != '!')) {
9556 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009557 break;
9558 } else if ((cur == '<') && (next == '!') &&
9559 (ctxt->input->cur[2] == '-') &&
9560 (ctxt->input->cur[3] == '-')) {
9561 if ((!terminate) &&
9562 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9563 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009564 xmlParseComment(ctxt);
9565 ctxt->instate = XML_PARSER_CONTENT;
9566 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9567 (ctxt->input->cur[2] == '[') &&
9568 (ctxt->input->cur[3] == 'C') &&
9569 (ctxt->input->cur[4] == 'D') &&
9570 (ctxt->input->cur[5] == 'A') &&
9571 (ctxt->input->cur[6] == 'T') &&
9572 (ctxt->input->cur[7] == 'A') &&
9573 (ctxt->input->cur[8] == '[')) {
9574 SKIP(9);
9575 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009576 break;
9577 } else if ((cur == '<') && (next == '!') &&
9578 (avail < 9)) {
9579 goto done;
9580 } else if (cur == '&') {
9581 if ((!terminate) &&
9582 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9583 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 xmlParseReference(ctxt);
9585 } else {
9586 /* TODO Avoid the extra copy, handle directly !!! */
9587 /*
9588 * Goal of the following test is:
9589 * - minimize calls to the SAX 'character' callback
9590 * when they are mergeable
9591 * - handle an problem for isBlank when we only parse
9592 * a sequence of blank chars and the next one is
9593 * not available to check against '<' presence.
9594 * - tries to homogenize the differences in SAX
9595 * callbacks between the push and pull versions
9596 * of the parser.
9597 */
9598 if ((ctxt->inputNr == 1) &&
9599 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9600 if (!terminate) {
9601 if (ctxt->progressive) {
9602 if ((lastlt == NULL) ||
9603 (ctxt->input->cur > lastlt))
9604 goto done;
9605 } else if (xmlParseLookupSequence(ctxt,
9606 '<', 0, 0) < 0) {
9607 goto done;
9608 }
9609 }
9610 }
9611 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009612 xmlParseCharData(ctxt, 0);
9613 }
9614 /*
9615 * Pop-up of finished entities.
9616 */
9617 while ((RAW == 0) && (ctxt->inputNr > 1))
9618 xmlPopInput(ctxt);
9619 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009620 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9621 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009622 ctxt->instate = XML_PARSER_EOF;
9623 break;
9624 }
9625 break;
9626 }
9627 case XML_PARSER_END_TAG:
9628 if (avail < 2)
9629 goto done;
9630 if (!terminate) {
9631 if (ctxt->progressive) {
9632 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9633 goto done;
9634 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9635 goto done;
9636 }
9637 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009638 if (ctxt->sax2) {
9639 xmlParseEndTag2(ctxt,
9640 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9641 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9642 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9643 nameNsPop(ctxt);
9644 } else
9645 xmlParseEndTag1(ctxt, 0);
9646 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009647 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009648 } else {
9649 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009650 }
9651 break;
9652 case XML_PARSER_CDATA_SECTION: {
9653 /*
9654 * The Push mode need to have the SAX callback for
9655 * cdataBlock merge back contiguous callbacks.
9656 */
9657 int base;
9658
9659 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9660 if (base < 0) {
9661 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9662 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9663 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009664 ctxt->sax->cdataBlock(ctxt->userData,
9665 ctxt->input->cur,
9666 XML_PARSER_BIG_BUFFER_SIZE);
9667 else if (ctxt->sax->characters != NULL)
9668 ctxt->sax->characters(ctxt->userData,
9669 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009670 XML_PARSER_BIG_BUFFER_SIZE);
9671 }
9672 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9673 ctxt->checkIndex = 0;
9674 }
9675 goto done;
9676 } else {
9677 if ((ctxt->sax != NULL) && (base > 0) &&
9678 (!ctxt->disableSAX)) {
9679 if (ctxt->sax->cdataBlock != NULL)
9680 ctxt->sax->cdataBlock(ctxt->userData,
9681 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009682 else if (ctxt->sax->characters != NULL)
9683 ctxt->sax->characters(ctxt->userData,
9684 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009685 }
9686 SKIP(base + 3);
9687 ctxt->checkIndex = 0;
9688 ctxt->instate = XML_PARSER_CONTENT;
9689#ifdef DEBUG_PUSH
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: entering CONTENT\n");
9692#endif
9693 }
9694 break;
9695 }
Owen Taylor3473f882001-02-23 17:55:21 +00009696 case XML_PARSER_MISC:
9697 SKIP_BLANKS;
9698 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009699 avail = ctxt->input->length -
9700 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009701 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009702 avail = ctxt->input->buf->buffer->use -
9703 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009704 if (avail < 2)
9705 goto done;
9706 cur = ctxt->input->cur[0];
9707 next = ctxt->input->cur[1];
9708 if ((cur == '<') && (next == '?')) {
9709 if ((!terminate) &&
9710 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9711 goto done;
9712#ifdef DEBUG_PUSH
9713 xmlGenericError(xmlGenericErrorContext,
9714 "PP: Parsing PI\n");
9715#endif
9716 xmlParsePI(ctxt);
9717 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009718 (ctxt->input->cur[2] == '-') &&
9719 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009720 if ((!terminate) &&
9721 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9722 goto done;
9723#ifdef DEBUG_PUSH
9724 xmlGenericError(xmlGenericErrorContext,
9725 "PP: Parsing Comment\n");
9726#endif
9727 xmlParseComment(ctxt);
9728 ctxt->instate = XML_PARSER_MISC;
9729 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009730 (ctxt->input->cur[2] == 'D') &&
9731 (ctxt->input->cur[3] == 'O') &&
9732 (ctxt->input->cur[4] == 'C') &&
9733 (ctxt->input->cur[5] == 'T') &&
9734 (ctxt->input->cur[6] == 'Y') &&
9735 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009736 (ctxt->input->cur[8] == 'E')) {
9737 if ((!terminate) &&
9738 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9739 goto done;
9740#ifdef DEBUG_PUSH
9741 xmlGenericError(xmlGenericErrorContext,
9742 "PP: Parsing internal subset\n");
9743#endif
9744 ctxt->inSubset = 1;
9745 xmlParseDocTypeDecl(ctxt);
9746 if (RAW == '[') {
9747 ctxt->instate = XML_PARSER_DTD;
9748#ifdef DEBUG_PUSH
9749 xmlGenericError(xmlGenericErrorContext,
9750 "PP: entering DTD\n");
9751#endif
9752 } else {
9753 /*
9754 * Create and update the external subset.
9755 */
9756 ctxt->inSubset = 2;
9757 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9758 (ctxt->sax->externalSubset != NULL))
9759 ctxt->sax->externalSubset(ctxt->userData,
9760 ctxt->intSubName, ctxt->extSubSystem,
9761 ctxt->extSubURI);
9762 ctxt->inSubset = 0;
9763 ctxt->instate = XML_PARSER_PROLOG;
9764#ifdef DEBUG_PUSH
9765 xmlGenericError(xmlGenericErrorContext,
9766 "PP: entering PROLOG\n");
9767#endif
9768 }
9769 } else if ((cur == '<') && (next == '!') &&
9770 (avail < 9)) {
9771 goto done;
9772 } else {
9773 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009774 ctxt->progressive = 1;
9775 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009776#ifdef DEBUG_PUSH
9777 xmlGenericError(xmlGenericErrorContext,
9778 "PP: entering START_TAG\n");
9779#endif
9780 }
9781 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009782 case XML_PARSER_PROLOG:
9783 SKIP_BLANKS;
9784 if (ctxt->input->buf == NULL)
9785 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9786 else
9787 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9788 if (avail < 2)
9789 goto done;
9790 cur = ctxt->input->cur[0];
9791 next = ctxt->input->cur[1];
9792 if ((cur == '<') && (next == '?')) {
9793 if ((!terminate) &&
9794 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9795 goto done;
9796#ifdef DEBUG_PUSH
9797 xmlGenericError(xmlGenericErrorContext,
9798 "PP: Parsing PI\n");
9799#endif
9800 xmlParsePI(ctxt);
9801 } else if ((cur == '<') && (next == '!') &&
9802 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9803 if ((!terminate) &&
9804 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9805 goto done;
9806#ifdef DEBUG_PUSH
9807 xmlGenericError(xmlGenericErrorContext,
9808 "PP: Parsing Comment\n");
9809#endif
9810 xmlParseComment(ctxt);
9811 ctxt->instate = XML_PARSER_PROLOG;
9812 } else if ((cur == '<') && (next == '!') &&
9813 (avail < 4)) {
9814 goto done;
9815 } else {
9816 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009817 ctxt->progressive = 1;
9818 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009819#ifdef DEBUG_PUSH
9820 xmlGenericError(xmlGenericErrorContext,
9821 "PP: entering START_TAG\n");
9822#endif
9823 }
9824 break;
9825 case XML_PARSER_EPILOG:
9826 SKIP_BLANKS;
9827 if (ctxt->input->buf == NULL)
9828 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9829 else
9830 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9831 if (avail < 2)
9832 goto done;
9833 cur = ctxt->input->cur[0];
9834 next = ctxt->input->cur[1];
9835 if ((cur == '<') && (next == '?')) {
9836 if ((!terminate) &&
9837 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9838 goto done;
9839#ifdef DEBUG_PUSH
9840 xmlGenericError(xmlGenericErrorContext,
9841 "PP: Parsing PI\n");
9842#endif
9843 xmlParsePI(ctxt);
9844 ctxt->instate = XML_PARSER_EPILOG;
9845 } else if ((cur == '<') && (next == '!') &&
9846 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9847 if ((!terminate) &&
9848 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9849 goto done;
9850#ifdef DEBUG_PUSH
9851 xmlGenericError(xmlGenericErrorContext,
9852 "PP: Parsing Comment\n");
9853#endif
9854 xmlParseComment(ctxt);
9855 ctxt->instate = XML_PARSER_EPILOG;
9856 } else if ((cur == '<') && (next == '!') &&
9857 (avail < 4)) {
9858 goto done;
9859 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009860 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009861 ctxt->instate = XML_PARSER_EOF;
9862#ifdef DEBUG_PUSH
9863 xmlGenericError(xmlGenericErrorContext,
9864 "PP: entering EOF\n");
9865#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009866 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009867 ctxt->sax->endDocument(ctxt->userData);
9868 goto done;
9869 }
9870 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009871 case XML_PARSER_DTD: {
9872 /*
9873 * Sorry but progressive parsing of the internal subset
9874 * is not expected to be supported. We first check that
9875 * the full content of the internal subset is available and
9876 * the parsing is launched only at that point.
9877 * Internal subset ends up with "']' S? '>'" in an unescaped
9878 * section and not in a ']]>' sequence which are conditional
9879 * sections (whoever argued to keep that crap in XML deserve
9880 * a place in hell !).
9881 */
9882 int base, i;
9883 xmlChar *buf;
9884 xmlChar quote = 0;
9885
9886 base = ctxt->input->cur - ctxt->input->base;
9887 if (base < 0) return(0);
9888 if (ctxt->checkIndex > base)
9889 base = ctxt->checkIndex;
9890 buf = ctxt->input->buf->buffer->content;
9891 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9892 base++) {
9893 if (quote != 0) {
9894 if (buf[base] == quote)
9895 quote = 0;
9896 continue;
9897 }
9898 if (buf[base] == '"') {
9899 quote = '"';
9900 continue;
9901 }
9902 if (buf[base] == '\'') {
9903 quote = '\'';
9904 continue;
9905 }
9906 if (buf[base] == ']') {
9907 if ((unsigned int) base +1 >=
9908 ctxt->input->buf->buffer->use)
9909 break;
9910 if (buf[base + 1] == ']') {
9911 /* conditional crap, skip both ']' ! */
9912 base++;
9913 continue;
9914 }
9915 for (i = 0;
9916 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9917 i++) {
9918 if (buf[base + i] == '>')
9919 goto found_end_int_subset;
9920 }
9921 break;
9922 }
9923 }
9924 /*
9925 * We didn't found the end of the Internal subset
9926 */
9927 if (quote == 0)
9928 ctxt->checkIndex = base;
9929#ifdef DEBUG_PUSH
9930 if (next == 0)
9931 xmlGenericError(xmlGenericErrorContext,
9932 "PP: lookup of int subset end filed\n");
9933#endif
9934 goto done;
9935
9936found_end_int_subset:
9937 xmlParseInternalSubset(ctxt);
9938 ctxt->inSubset = 2;
9939 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9940 (ctxt->sax->externalSubset != NULL))
9941 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9942 ctxt->extSubSystem, ctxt->extSubURI);
9943 ctxt->inSubset = 0;
9944 ctxt->instate = XML_PARSER_PROLOG;
9945 ctxt->checkIndex = 0;
9946#ifdef DEBUG_PUSH
9947 xmlGenericError(xmlGenericErrorContext,
9948 "PP: entering PROLOG\n");
9949#endif
9950 break;
9951 }
9952 case XML_PARSER_COMMENT:
9953 xmlGenericError(xmlGenericErrorContext,
9954 "PP: internal error, state == COMMENT\n");
9955 ctxt->instate = XML_PARSER_CONTENT;
9956#ifdef DEBUG_PUSH
9957 xmlGenericError(xmlGenericErrorContext,
9958 "PP: entering CONTENT\n");
9959#endif
9960 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009961 case XML_PARSER_IGNORE:
9962 xmlGenericError(xmlGenericErrorContext,
9963 "PP: internal error, state == IGNORE");
9964 ctxt->instate = XML_PARSER_DTD;
9965#ifdef DEBUG_PUSH
9966 xmlGenericError(xmlGenericErrorContext,
9967 "PP: entering DTD\n");
9968#endif
9969 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009970 case XML_PARSER_PI:
9971 xmlGenericError(xmlGenericErrorContext,
9972 "PP: internal error, state == PI\n");
9973 ctxt->instate = XML_PARSER_CONTENT;
9974#ifdef DEBUG_PUSH
9975 xmlGenericError(xmlGenericErrorContext,
9976 "PP: entering CONTENT\n");
9977#endif
9978 break;
9979 case XML_PARSER_ENTITY_DECL:
9980 xmlGenericError(xmlGenericErrorContext,
9981 "PP: internal error, state == ENTITY_DECL\n");
9982 ctxt->instate = XML_PARSER_DTD;
9983#ifdef DEBUG_PUSH
9984 xmlGenericError(xmlGenericErrorContext,
9985 "PP: entering DTD\n");
9986#endif
9987 break;
9988 case XML_PARSER_ENTITY_VALUE:
9989 xmlGenericError(xmlGenericErrorContext,
9990 "PP: internal error, state == ENTITY_VALUE\n");
9991 ctxt->instate = XML_PARSER_CONTENT;
9992#ifdef DEBUG_PUSH
9993 xmlGenericError(xmlGenericErrorContext,
9994 "PP: entering DTD\n");
9995#endif
9996 break;
9997 case XML_PARSER_ATTRIBUTE_VALUE:
9998 xmlGenericError(xmlGenericErrorContext,
9999 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10000 ctxt->instate = XML_PARSER_START_TAG;
10001#ifdef DEBUG_PUSH
10002 xmlGenericError(xmlGenericErrorContext,
10003 "PP: entering START_TAG\n");
10004#endif
10005 break;
10006 case XML_PARSER_SYSTEM_LITERAL:
10007 xmlGenericError(xmlGenericErrorContext,
10008 "PP: internal error, state == SYSTEM_LITERAL\n");
10009 ctxt->instate = XML_PARSER_START_TAG;
10010#ifdef DEBUG_PUSH
10011 xmlGenericError(xmlGenericErrorContext,
10012 "PP: entering START_TAG\n");
10013#endif
10014 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010015 case XML_PARSER_PUBLIC_LITERAL:
10016 xmlGenericError(xmlGenericErrorContext,
10017 "PP: internal error, state == PUBLIC_LITERAL\n");
10018 ctxt->instate = XML_PARSER_START_TAG;
10019#ifdef DEBUG_PUSH
10020 xmlGenericError(xmlGenericErrorContext,
10021 "PP: entering START_TAG\n");
10022#endif
10023 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010024 }
10025 }
10026done:
10027#ifdef DEBUG_PUSH
10028 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10029#endif
10030 return(ret);
10031}
10032
10033/**
Owen Taylor3473f882001-02-23 17:55:21 +000010034 * xmlParseChunk:
10035 * @ctxt: an XML parser context
10036 * @chunk: an char array
10037 * @size: the size in byte of the chunk
10038 * @terminate: last chunk indicator
10039 *
10040 * Parse a Chunk of memory
10041 *
10042 * Returns zero if no error, the xmlParserErrors otherwise.
10043 */
10044int
10045xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10046 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010047 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10048 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010049 if (ctxt->instate == XML_PARSER_START)
10050 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010051 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10052 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10053 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10054 int cur = ctxt->input->cur - ctxt->input->base;
10055
10056 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10057 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10058 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010059 ctxt->input->end =
10060 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010061#ifdef DEBUG_PUSH
10062 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10063#endif
10064
Owen Taylor3473f882001-02-23 17:55:21 +000010065 } else if (ctxt->instate != XML_PARSER_EOF) {
10066 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10067 xmlParserInputBufferPtr in = ctxt->input->buf;
10068 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10069 (in->raw != NULL)) {
10070 int nbchars;
10071
10072 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10073 if (nbchars < 0) {
10074 xmlGenericError(xmlGenericErrorContext,
10075 "xmlParseChunk: encoder error\n");
10076 return(XML_ERR_INVALID_ENCODING);
10077 }
10078 }
10079 }
10080 }
10081 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010082 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10083 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010084 if (terminate) {
10085 /*
10086 * Check for termination
10087 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010088 int avail = 0;
10089 if (ctxt->input->buf == NULL)
10090 avail = ctxt->input->length -
10091 (ctxt->input->cur - ctxt->input->base);
10092 else
10093 avail = ctxt->input->buf->buffer->use -
10094 (ctxt->input->cur - ctxt->input->base);
10095
Owen Taylor3473f882001-02-23 17:55:21 +000010096 if ((ctxt->instate != XML_PARSER_EOF) &&
10097 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010098 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010099 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010100 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010101 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010102 }
Owen Taylor3473f882001-02-23 17:55:21 +000010103 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010104 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010105 ctxt->sax->endDocument(ctxt->userData);
10106 }
10107 ctxt->instate = XML_PARSER_EOF;
10108 }
10109 return((xmlParserErrors) ctxt->errNo);
10110}
10111
10112/************************************************************************
10113 * *
10114 * I/O front end functions to the parser *
10115 * *
10116 ************************************************************************/
10117
10118/**
10119 * xmlStopParser:
10120 * @ctxt: an XML parser context
10121 *
10122 * Blocks further parser processing
10123 */
10124void
10125xmlStopParser(xmlParserCtxtPtr ctxt) {
10126 ctxt->instate = XML_PARSER_EOF;
10127 if (ctxt->input != NULL)
10128 ctxt->input->cur = BAD_CAST"";
10129}
10130
10131/**
10132 * xmlCreatePushParserCtxt:
10133 * @sax: a SAX handler
10134 * @user_data: The user data returned on SAX callbacks
10135 * @chunk: a pointer to an array of chars
10136 * @size: number of chars in the array
10137 * @filename: an optional file name or URI
10138 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010139 * Create a parser context for using the XML parser in push mode.
10140 * If @buffer and @size are non-NULL, the data is used to detect
10141 * the encoding. The remaining characters will be parsed so they
10142 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010143 * To allow content encoding detection, @size should be >= 4
10144 * The value of @filename is used for fetching external entities
10145 * and error/warning reports.
10146 *
10147 * Returns the new parser context or NULL
10148 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010149
Owen Taylor3473f882001-02-23 17:55:21 +000010150xmlParserCtxtPtr
10151xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10152 const char *chunk, int size, const char *filename) {
10153 xmlParserCtxtPtr ctxt;
10154 xmlParserInputPtr inputStream;
10155 xmlParserInputBufferPtr buf;
10156 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10157
10158 /*
10159 * plug some encoding conversion routines
10160 */
10161 if ((chunk != NULL) && (size >= 4))
10162 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10163
10164 buf = xmlAllocParserInputBuffer(enc);
10165 if (buf == NULL) return(NULL);
10166
10167 ctxt = xmlNewParserCtxt();
10168 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010169 xmlGenericError(xmlGenericErrorContext,
10170 "xml parser: out of memory\n");
10171 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010172 return(NULL);
10173 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010174 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10175 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010177 xmlFreeParserInputBuffer(buf);
10178 xmlFreeParserCtxt(ctxt);
10179 return(NULL);
10180 }
Owen Taylor3473f882001-02-23 17:55:21 +000010181 if (sax != NULL) {
Daniel Veillard092643b2003-09-25 14:29:29 +000010182 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000010183 xmlFree(ctxt->sax);
10184 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10185 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010186 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010187 xmlFreeParserInputBuffer(buf);
10188 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010189 return(NULL);
10190 }
10191 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10192 if (user_data != NULL)
10193 ctxt->userData = user_data;
10194 }
10195 if (filename == NULL) {
10196 ctxt->directory = NULL;
10197 } else {
10198 ctxt->directory = xmlParserGetDirectory(filename);
10199 }
10200
10201 inputStream = xmlNewInputStream(ctxt);
10202 if (inputStream == NULL) {
10203 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010204 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010205 return(NULL);
10206 }
10207
10208 if (filename == NULL)
10209 inputStream->filename = NULL;
10210 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010211 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010212 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010213 inputStream->buf = buf;
10214 inputStream->base = inputStream->buf->buffer->content;
10215 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010216 inputStream->end =
10217 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010218
10219 inputPush(ctxt, inputStream);
10220
10221 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10222 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010223 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10224 int cur = ctxt->input->cur - ctxt->input->base;
10225
Owen Taylor3473f882001-02-23 17:55:21 +000010226 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010227
10228 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10229 ctxt->input->cur = ctxt->input->base + cur;
10230 ctxt->input->end =
10231 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010232#ifdef DEBUG_PUSH
10233 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10234#endif
10235 }
10236
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010237 if (enc != XML_CHAR_ENCODING_NONE) {
10238 xmlSwitchEncoding(ctxt, enc);
10239 }
10240
Owen Taylor3473f882001-02-23 17:55:21 +000010241 return(ctxt);
10242}
10243
10244/**
10245 * xmlCreateIOParserCtxt:
10246 * @sax: a SAX handler
10247 * @user_data: The user data returned on SAX callbacks
10248 * @ioread: an I/O read function
10249 * @ioclose: an I/O close function
10250 * @ioctx: an I/O handler
10251 * @enc: the charset encoding if known
10252 *
10253 * Create a parser context for using the XML parser with an existing
10254 * I/O stream
10255 *
10256 * Returns the new parser context or NULL
10257 */
10258xmlParserCtxtPtr
10259xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10260 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10261 void *ioctx, xmlCharEncoding enc) {
10262 xmlParserCtxtPtr ctxt;
10263 xmlParserInputPtr inputStream;
10264 xmlParserInputBufferPtr buf;
10265
10266 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10267 if (buf == NULL) return(NULL);
10268
10269 ctxt = xmlNewParserCtxt();
10270 if (ctxt == NULL) {
10271 xmlFree(buf);
10272 return(NULL);
10273 }
10274 if (sax != NULL) {
Daniel Veillard092643b2003-09-25 14:29:29 +000010275 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000010276 xmlFree(ctxt->sax);
10277 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10278 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010279 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010280 xmlFree(ctxt);
10281 return(NULL);
10282 }
10283 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10284 if (user_data != NULL)
10285 ctxt->userData = user_data;
10286 }
10287
10288 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10289 if (inputStream == NULL) {
10290 xmlFreeParserCtxt(ctxt);
10291 return(NULL);
10292 }
10293 inputPush(ctxt, inputStream);
10294
10295 return(ctxt);
10296}
10297
Daniel Veillard4432df22003-09-28 18:58:27 +000010298#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010299/************************************************************************
10300 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010301 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010302 * *
10303 ************************************************************************/
10304
10305/**
10306 * xmlIOParseDTD:
10307 * @sax: the SAX handler block or NULL
10308 * @input: an Input Buffer
10309 * @enc: the charset encoding if known
10310 *
10311 * Load and parse a DTD
10312 *
10313 * Returns the resulting xmlDtdPtr or NULL in case of error.
10314 * @input will be freed at parsing end.
10315 */
10316
10317xmlDtdPtr
10318xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10319 xmlCharEncoding enc) {
10320 xmlDtdPtr ret = NULL;
10321 xmlParserCtxtPtr ctxt;
10322 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010323 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010324
10325 if (input == NULL)
10326 return(NULL);
10327
10328 ctxt = xmlNewParserCtxt();
10329 if (ctxt == NULL) {
10330 return(NULL);
10331 }
10332
10333 /*
10334 * Set-up the SAX context
10335 */
10336 if (sax != NULL) {
10337 if (ctxt->sax != NULL)
10338 xmlFree(ctxt->sax);
10339 ctxt->sax = sax;
10340 ctxt->userData = NULL;
10341 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010342 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010343
10344 /*
10345 * generate a parser input from the I/O handler
10346 */
10347
10348 pinput = xmlNewIOInputStream(ctxt, input, enc);
10349 if (pinput == NULL) {
10350 if (sax != NULL) ctxt->sax = NULL;
10351 xmlFreeParserCtxt(ctxt);
10352 return(NULL);
10353 }
10354
10355 /*
10356 * plug some encoding conversion routines here.
10357 */
10358 xmlPushInput(ctxt, pinput);
10359
10360 pinput->filename = NULL;
10361 pinput->line = 1;
10362 pinput->col = 1;
10363 pinput->base = ctxt->input->cur;
10364 pinput->cur = ctxt->input->cur;
10365 pinput->free = NULL;
10366
10367 /*
10368 * let's parse that entity knowing it's an external subset.
10369 */
10370 ctxt->inSubset = 2;
10371 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10372 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10373 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010374
10375 if (enc == XML_CHAR_ENCODING_NONE) {
10376 /*
10377 * Get the 4 first bytes and decode the charset
10378 * if enc != XML_CHAR_ENCODING_NONE
10379 * plug some encoding conversion routines.
10380 */
10381 start[0] = RAW;
10382 start[1] = NXT(1);
10383 start[2] = NXT(2);
10384 start[3] = NXT(3);
10385 enc = xmlDetectCharEncoding(start, 4);
10386 if (enc != XML_CHAR_ENCODING_NONE) {
10387 xmlSwitchEncoding(ctxt, enc);
10388 }
10389 }
10390
Owen Taylor3473f882001-02-23 17:55:21 +000010391 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10392
10393 if (ctxt->myDoc != NULL) {
10394 if (ctxt->wellFormed) {
10395 ret = ctxt->myDoc->extSubset;
10396 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010397 if (ret != NULL) {
10398 xmlNodePtr tmp;
10399
10400 ret->doc = NULL;
10401 tmp = ret->children;
10402 while (tmp != NULL) {
10403 tmp->doc = NULL;
10404 tmp = tmp->next;
10405 }
10406 }
Owen Taylor3473f882001-02-23 17:55:21 +000010407 } else {
10408 ret = NULL;
10409 }
10410 xmlFreeDoc(ctxt->myDoc);
10411 ctxt->myDoc = NULL;
10412 }
10413 if (sax != NULL) ctxt->sax = NULL;
10414 xmlFreeParserCtxt(ctxt);
10415
10416 return(ret);
10417}
10418
10419/**
10420 * xmlSAXParseDTD:
10421 * @sax: the SAX handler block
10422 * @ExternalID: a NAME* containing the External ID of the DTD
10423 * @SystemID: a NAME* containing the URL to the DTD
10424 *
10425 * Load and parse an external subset.
10426 *
10427 * Returns the resulting xmlDtdPtr or NULL in case of error.
10428 */
10429
10430xmlDtdPtr
10431xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10432 const xmlChar *SystemID) {
10433 xmlDtdPtr ret = NULL;
10434 xmlParserCtxtPtr ctxt;
10435 xmlParserInputPtr input = NULL;
10436 xmlCharEncoding enc;
10437
10438 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10439
10440 ctxt = xmlNewParserCtxt();
10441 if (ctxt == NULL) {
10442 return(NULL);
10443 }
10444
10445 /*
10446 * Set-up the SAX context
10447 */
10448 if (sax != NULL) {
10449 if (ctxt->sax != NULL)
10450 xmlFree(ctxt->sax);
10451 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010452 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010453 }
10454
10455 /*
10456 * Ask the Entity resolver to load the damn thing
10457 */
10458
10459 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010460 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010461 if (input == NULL) {
10462 if (sax != NULL) ctxt->sax = NULL;
10463 xmlFreeParserCtxt(ctxt);
10464 return(NULL);
10465 }
10466
10467 /*
10468 * plug some encoding conversion routines here.
10469 */
10470 xmlPushInput(ctxt, input);
10471 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10472 xmlSwitchEncoding(ctxt, enc);
10473
10474 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010475 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010476 input->line = 1;
10477 input->col = 1;
10478 input->base = ctxt->input->cur;
10479 input->cur = ctxt->input->cur;
10480 input->free = NULL;
10481
10482 /*
10483 * let's parse that entity knowing it's an external subset.
10484 */
10485 ctxt->inSubset = 2;
10486 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10487 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10488 ExternalID, SystemID);
10489 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10490
10491 if (ctxt->myDoc != NULL) {
10492 if (ctxt->wellFormed) {
10493 ret = ctxt->myDoc->extSubset;
10494 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010495 if (ret != NULL) {
10496 xmlNodePtr tmp;
10497
10498 ret->doc = NULL;
10499 tmp = ret->children;
10500 while (tmp != NULL) {
10501 tmp->doc = NULL;
10502 tmp = tmp->next;
10503 }
10504 }
Owen Taylor3473f882001-02-23 17:55:21 +000010505 } else {
10506 ret = NULL;
10507 }
10508 xmlFreeDoc(ctxt->myDoc);
10509 ctxt->myDoc = NULL;
10510 }
10511 if (sax != NULL) ctxt->sax = NULL;
10512 xmlFreeParserCtxt(ctxt);
10513
10514 return(ret);
10515}
10516
Daniel Veillard4432df22003-09-28 18:58:27 +000010517
Owen Taylor3473f882001-02-23 17:55:21 +000010518/**
10519 * xmlParseDTD:
10520 * @ExternalID: a NAME* containing the External ID of the DTD
10521 * @SystemID: a NAME* containing the URL to the DTD
10522 *
10523 * Load and parse an external subset.
10524 *
10525 * Returns the resulting xmlDtdPtr or NULL in case of error.
10526 */
10527
10528xmlDtdPtr
10529xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10530 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10531}
Daniel Veillard4432df22003-09-28 18:58:27 +000010532#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010533
10534/************************************************************************
10535 * *
10536 * Front ends when parsing an Entity *
10537 * *
10538 ************************************************************************/
10539
10540/**
Owen Taylor3473f882001-02-23 17:55:21 +000010541 * xmlParseCtxtExternalEntity:
10542 * @ctx: the existing parsing context
10543 * @URL: the URL for the entity to load
10544 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010545 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010546 *
10547 * Parse an external general entity within an existing parsing context
10548 * An external general parsed entity is well-formed if it matches the
10549 * production labeled extParsedEnt.
10550 *
10551 * [78] extParsedEnt ::= TextDecl? content
10552 *
10553 * Returns 0 if the entity is well formed, -1 in case of args problem and
10554 * the parser error code otherwise
10555 */
10556
10557int
10558xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010559 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010560 xmlParserCtxtPtr ctxt;
10561 xmlDocPtr newDoc;
10562 xmlSAXHandlerPtr oldsax = NULL;
10563 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010564 xmlChar start[4];
10565 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010566
10567 if (ctx->depth > 40) {
10568 return(XML_ERR_ENTITY_LOOP);
10569 }
10570
Daniel Veillardcda96922001-08-21 10:56:31 +000010571 if (lst != NULL)
10572 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010573 if ((URL == NULL) && (ID == NULL))
10574 return(-1);
10575 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10576 return(-1);
10577
10578
10579 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10580 if (ctxt == NULL) return(-1);
10581 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010582 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010583 oldsax = ctxt->sax;
10584 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010585 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010586 newDoc = xmlNewDoc(BAD_CAST "1.0");
10587 if (newDoc == NULL) {
10588 xmlFreeParserCtxt(ctxt);
10589 return(-1);
10590 }
10591 if (ctx->myDoc != NULL) {
10592 newDoc->intSubset = ctx->myDoc->intSubset;
10593 newDoc->extSubset = ctx->myDoc->extSubset;
10594 }
10595 if (ctx->myDoc->URL != NULL) {
10596 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10597 }
10598 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10599 if (newDoc->children == NULL) {
10600 ctxt->sax = oldsax;
10601 xmlFreeParserCtxt(ctxt);
10602 newDoc->intSubset = NULL;
10603 newDoc->extSubset = NULL;
10604 xmlFreeDoc(newDoc);
10605 return(-1);
10606 }
10607 nodePush(ctxt, newDoc->children);
10608 if (ctx->myDoc == NULL) {
10609 ctxt->myDoc = newDoc;
10610 } else {
10611 ctxt->myDoc = ctx->myDoc;
10612 newDoc->children->doc = ctx->myDoc;
10613 }
10614
Daniel Veillard87a764e2001-06-20 17:41:10 +000010615 /*
10616 * Get the 4 first bytes and decode the charset
10617 * if enc != XML_CHAR_ENCODING_NONE
10618 * plug some encoding conversion routines.
10619 */
10620 GROW
10621 start[0] = RAW;
10622 start[1] = NXT(1);
10623 start[2] = NXT(2);
10624 start[3] = NXT(3);
10625 enc = xmlDetectCharEncoding(start, 4);
10626 if (enc != XML_CHAR_ENCODING_NONE) {
10627 xmlSwitchEncoding(ctxt, enc);
10628 }
10629
Owen Taylor3473f882001-02-23 17:55:21 +000010630 /*
10631 * Parse a possible text declaration first
10632 */
Owen Taylor3473f882001-02-23 17:55:21 +000010633 if ((RAW == '<') && (NXT(1) == '?') &&
10634 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10635 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10636 xmlParseTextDecl(ctxt);
10637 }
10638
10639 /*
10640 * Doing validity checking on chunk doesn't make sense
10641 */
10642 ctxt->instate = XML_PARSER_CONTENT;
10643 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010644 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010645 ctxt->loadsubset = ctx->loadsubset;
10646 ctxt->depth = ctx->depth + 1;
10647 ctxt->replaceEntities = ctx->replaceEntities;
10648 if (ctxt->validate) {
10649 ctxt->vctxt.error = ctx->vctxt.error;
10650 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010651 } else {
10652 ctxt->vctxt.error = NULL;
10653 ctxt->vctxt.warning = NULL;
10654 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010655 ctxt->vctxt.nodeTab = NULL;
10656 ctxt->vctxt.nodeNr = 0;
10657 ctxt->vctxt.nodeMax = 0;
10658 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010659
10660 xmlParseContent(ctxt);
10661
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010662 ctx->validate = ctxt->validate;
10663 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010664 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010665 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010666 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010667 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010668 }
10669 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010670 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010671 }
10672
10673 if (!ctxt->wellFormed) {
10674 if (ctxt->errNo == 0)
10675 ret = 1;
10676 else
10677 ret = ctxt->errNo;
10678 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010679 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010680 xmlNodePtr cur;
10681
10682 /*
10683 * Return the newly created nodeset after unlinking it from
10684 * they pseudo parent.
10685 */
10686 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010687 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010688 while (cur != NULL) {
10689 cur->parent = NULL;
10690 cur = cur->next;
10691 }
10692 newDoc->children->children = NULL;
10693 }
10694 ret = 0;
10695 }
10696 ctxt->sax = oldsax;
10697 xmlFreeParserCtxt(ctxt);
10698 newDoc->intSubset = NULL;
10699 newDoc->extSubset = NULL;
10700 xmlFreeDoc(newDoc);
10701
10702 return(ret);
10703}
10704
10705/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010706 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010707 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010708 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010709 * @sax: the SAX handler bloc (possibly NULL)
10710 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10711 * @depth: Used for loop detection, use 0
10712 * @URL: the URL for the entity to load
10713 * @ID: the System ID for the entity to load
10714 * @list: the return value for the set of parsed nodes
10715 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010716 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010717 *
10718 * Returns 0 if the entity is well formed, -1 in case of args problem and
10719 * the parser error code otherwise
10720 */
10721
Daniel Veillard7d515752003-09-26 19:12:37 +000010722static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010723xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10724 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010725 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010726 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010727 xmlParserCtxtPtr ctxt;
10728 xmlDocPtr newDoc;
10729 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010730 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010731 xmlChar start[4];
10732 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010733
10734 if (depth > 40) {
10735 return(XML_ERR_ENTITY_LOOP);
10736 }
10737
10738
10739
10740 if (list != NULL)
10741 *list = NULL;
10742 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010743 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010744 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010745 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010746
10747
10748 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010749 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010750 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010751 if (oldctxt != NULL) {
10752 ctxt->_private = oldctxt->_private;
10753 ctxt->loadsubset = oldctxt->loadsubset;
10754 ctxt->validate = oldctxt->validate;
10755 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010756 ctxt->record_info = oldctxt->record_info;
10757 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10758 ctxt->node_seq.length = oldctxt->node_seq.length;
10759 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010760 } else {
10761 /*
10762 * Doing validity checking on chunk without context
10763 * doesn't make sense
10764 */
10765 ctxt->_private = NULL;
10766 ctxt->validate = 0;
10767 ctxt->external = 2;
10768 ctxt->loadsubset = 0;
10769 }
Owen Taylor3473f882001-02-23 17:55:21 +000010770 if (sax != NULL) {
10771 oldsax = ctxt->sax;
10772 ctxt->sax = sax;
10773 if (user_data != NULL)
10774 ctxt->userData = user_data;
10775 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010776 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010777 newDoc = xmlNewDoc(BAD_CAST "1.0");
10778 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010779 ctxt->node_seq.maximum = 0;
10780 ctxt->node_seq.length = 0;
10781 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010782 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010783 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010784 }
10785 if (doc != NULL) {
10786 newDoc->intSubset = doc->intSubset;
10787 newDoc->extSubset = doc->extSubset;
10788 }
10789 if (doc->URL != NULL) {
10790 newDoc->URL = xmlStrdup(doc->URL);
10791 }
10792 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10793 if (newDoc->children == NULL) {
10794 if (sax != NULL)
10795 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010796 ctxt->node_seq.maximum = 0;
10797 ctxt->node_seq.length = 0;
10798 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010799 xmlFreeParserCtxt(ctxt);
10800 newDoc->intSubset = NULL;
10801 newDoc->extSubset = NULL;
10802 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010803 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010804 }
10805 nodePush(ctxt, newDoc->children);
10806 if (doc == NULL) {
10807 ctxt->myDoc = newDoc;
10808 } else {
10809 ctxt->myDoc = doc;
10810 newDoc->children->doc = doc;
10811 }
10812
Daniel Veillard87a764e2001-06-20 17:41:10 +000010813 /*
10814 * Get the 4 first bytes and decode the charset
10815 * if enc != XML_CHAR_ENCODING_NONE
10816 * plug some encoding conversion routines.
10817 */
10818 GROW;
10819 start[0] = RAW;
10820 start[1] = NXT(1);
10821 start[2] = NXT(2);
10822 start[3] = NXT(3);
10823 enc = xmlDetectCharEncoding(start, 4);
10824 if (enc != XML_CHAR_ENCODING_NONE) {
10825 xmlSwitchEncoding(ctxt, enc);
10826 }
10827
Owen Taylor3473f882001-02-23 17:55:21 +000010828 /*
10829 * Parse a possible text declaration first
10830 */
Owen Taylor3473f882001-02-23 17:55:21 +000010831 if ((RAW == '<') && (NXT(1) == '?') &&
10832 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10833 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10834 xmlParseTextDecl(ctxt);
10835 }
10836
Owen Taylor3473f882001-02-23 17:55:21 +000010837 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010838 ctxt->depth = depth;
10839
10840 xmlParseContent(ctxt);
10841
Daniel Veillard561b7f82002-03-20 21:55:57 +000010842 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010843 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010844 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010845 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010846 }
10847 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010848 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010849 }
10850
10851 if (!ctxt->wellFormed) {
10852 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010853 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010854 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010855 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010856 } else {
10857 if (list != NULL) {
10858 xmlNodePtr cur;
10859
10860 /*
10861 * Return the newly created nodeset after unlinking it from
10862 * they pseudo parent.
10863 */
10864 cur = newDoc->children->children;
10865 *list = cur;
10866 while (cur != NULL) {
10867 cur->parent = NULL;
10868 cur = cur->next;
10869 }
10870 newDoc->children->children = NULL;
10871 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010872 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010873 }
10874 if (sax != NULL)
10875 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010876 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10877 oldctxt->node_seq.length = ctxt->node_seq.length;
10878 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010879 ctxt->node_seq.maximum = 0;
10880 ctxt->node_seq.length = 0;
10881 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010882 xmlFreeParserCtxt(ctxt);
10883 newDoc->intSubset = NULL;
10884 newDoc->extSubset = NULL;
10885 xmlFreeDoc(newDoc);
10886
10887 return(ret);
10888}
10889
10890/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010891 * xmlParseExternalEntity:
10892 * @doc: the document the chunk pertains to
10893 * @sax: the SAX handler bloc (possibly NULL)
10894 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10895 * @depth: Used for loop detection, use 0
10896 * @URL: the URL for the entity to load
10897 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010898 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010899 *
10900 * Parse an external general entity
10901 * An external general parsed entity is well-formed if it matches the
10902 * production labeled extParsedEnt.
10903 *
10904 * [78] extParsedEnt ::= TextDecl? content
10905 *
10906 * Returns 0 if the entity is well formed, -1 in case of args problem and
10907 * the parser error code otherwise
10908 */
10909
10910int
10911xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010912 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010913 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010914 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010915}
10916
10917/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010918 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010919 * @doc: the document the chunk pertains to
10920 * @sax: the SAX handler bloc (possibly NULL)
10921 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10922 * @depth: Used for loop detection, use 0
10923 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010924 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010925 *
10926 * Parse a well-balanced chunk of an XML document
10927 * called by the parser
10928 * The allowed sequence for the Well Balanced Chunk is the one defined by
10929 * the content production in the XML grammar:
10930 *
10931 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10932 *
10933 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10934 * the parser error code otherwise
10935 */
10936
10937int
10938xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010939 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010940 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10941 depth, string, lst, 0 );
10942}
10943
10944/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010945 * xmlParseBalancedChunkMemoryInternal:
10946 * @oldctxt: the existing parsing context
10947 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10948 * @user_data: the user data field for the parser context
10949 * @lst: the return value for the set of parsed nodes
10950 *
10951 *
10952 * Parse a well-balanced chunk of an XML document
10953 * called by the parser
10954 * The allowed sequence for the Well Balanced Chunk is the one defined by
10955 * the content production in the XML grammar:
10956 *
10957 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10958 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010959 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10960 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010961 *
10962 * In case recover is set to 1, the nodelist will not be empty even if
10963 * the parsed chunk is not well balanced.
10964 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010965static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010966xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10967 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10968 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010969 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010970 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010971 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010972 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010973 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010974
10975 if (oldctxt->depth > 40) {
10976 return(XML_ERR_ENTITY_LOOP);
10977 }
10978
10979
10980 if (lst != NULL)
10981 *lst = NULL;
10982 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010983 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010984
10985 size = xmlStrlen(string);
10986
10987 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010988 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010989 if (user_data != NULL)
10990 ctxt->userData = user_data;
10991 else
10992 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010993 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10994 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010995
10996 oldsax = ctxt->sax;
10997 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010998 xmlDetectSAX2(ctxt);
10999
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011000 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011001 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011002 newDoc = xmlNewDoc(BAD_CAST "1.0");
11003 if (newDoc == NULL) {
11004 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011005 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011006 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011007 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011008 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011009 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011010 } else {
11011 ctxt->myDoc = oldctxt->myDoc;
11012 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011013 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000011014 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000011015 BAD_CAST "pseudoroot", NULL);
11016 if (ctxt->myDoc->children == NULL) {
11017 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011018 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011019 xmlFreeParserCtxt(ctxt);
11020 if (newDoc != NULL)
11021 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011022 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011023 }
11024 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011025 ctxt->instate = XML_PARSER_CONTENT;
11026 ctxt->depth = oldctxt->depth + 1;
11027
Daniel Veillard328f48c2002-11-15 15:24:34 +000011028 ctxt->validate = 0;
11029 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011030 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11031 /*
11032 * ID/IDREF registration will be done in xmlValidateElement below
11033 */
11034 ctxt->loadsubset |= XML_SKIP_IDS;
11035 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011036 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011037
Daniel Veillard68e9e742002-11-16 15:35:11 +000011038 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011039 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011040 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011041 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011042 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011043 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011044 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011045 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011046 }
11047
11048 if (!ctxt->wellFormed) {
11049 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011050 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011051 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011052 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011053 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011054 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011055 }
11056
William M. Brack7b9154b2003-09-27 19:23:50 +000011057 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011058 xmlNodePtr cur;
11059
11060 /*
11061 * Return the newly created nodeset after unlinking it from
11062 * they pseudo parent.
11063 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011064 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011065 *lst = cur;
11066 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011067#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011068 if (oldctxt->validate && oldctxt->wellFormed &&
11069 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11070 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11071 oldctxt->myDoc, cur);
11072 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011073#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011074 cur->parent = NULL;
11075 cur = cur->next;
11076 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011077 ctxt->myDoc->children->children = NULL;
11078 }
11079 if (ctxt->myDoc != NULL) {
11080 xmlFreeNode(ctxt->myDoc->children);
11081 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011082 }
11083
11084 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011085 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011086 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011087 if (newDoc != NULL)
11088 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011089
11090 return(ret);
11091}
11092
11093/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011094 * xmlParseBalancedChunkMemoryRecover:
11095 * @doc: the document the chunk pertains to
11096 * @sax: the SAX handler bloc (possibly NULL)
11097 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11098 * @depth: Used for loop detection, use 0
11099 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11100 * @lst: the return value for the set of parsed nodes
11101 * @recover: return nodes even if the data is broken (use 0)
11102 *
11103 *
11104 * Parse a well-balanced chunk of an XML document
11105 * called by the parser
11106 * The allowed sequence for the Well Balanced Chunk is the one defined by
11107 * the content production in the XML grammar:
11108 *
11109 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11110 *
11111 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11112 * the parser error code otherwise
11113 *
11114 * In case recover is set to 1, the nodelist will not be empty even if
11115 * the parsed chunk is not well balanced.
11116 */
11117int
11118xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11119 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11120 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011121 xmlParserCtxtPtr ctxt;
11122 xmlDocPtr newDoc;
11123 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011124 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011125 int size;
11126 int ret = 0;
11127
11128 if (depth > 40) {
11129 return(XML_ERR_ENTITY_LOOP);
11130 }
11131
11132
Daniel Veillardcda96922001-08-21 10:56:31 +000011133 if (lst != NULL)
11134 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011135 if (string == NULL)
11136 return(-1);
11137
11138 size = xmlStrlen(string);
11139
11140 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11141 if (ctxt == NULL) return(-1);
11142 ctxt->userData = ctxt;
11143 if (sax != NULL) {
11144 oldsax = ctxt->sax;
11145 ctxt->sax = sax;
11146 if (user_data != NULL)
11147 ctxt->userData = user_data;
11148 }
11149 newDoc = xmlNewDoc(BAD_CAST "1.0");
11150 if (newDoc == NULL) {
11151 xmlFreeParserCtxt(ctxt);
11152 return(-1);
11153 }
11154 if (doc != NULL) {
11155 newDoc->intSubset = doc->intSubset;
11156 newDoc->extSubset = doc->extSubset;
11157 }
11158 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11159 if (newDoc->children == NULL) {
11160 if (sax != NULL)
11161 ctxt->sax = oldsax;
11162 xmlFreeParserCtxt(ctxt);
11163 newDoc->intSubset = NULL;
11164 newDoc->extSubset = NULL;
11165 xmlFreeDoc(newDoc);
11166 return(-1);
11167 }
11168 nodePush(ctxt, newDoc->children);
11169 if (doc == NULL) {
11170 ctxt->myDoc = newDoc;
11171 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011172 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011173 newDoc->children->doc = doc;
11174 }
11175 ctxt->instate = XML_PARSER_CONTENT;
11176 ctxt->depth = depth;
11177
11178 /*
11179 * Doing validity checking on chunk doesn't make sense
11180 */
11181 ctxt->validate = 0;
11182 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011183 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011184
Daniel Veillardb39bc392002-10-26 19:29:51 +000011185 if ( doc != NULL ){
11186 content = doc->children;
11187 doc->children = NULL;
11188 xmlParseContent(ctxt);
11189 doc->children = content;
11190 }
11191 else {
11192 xmlParseContent(ctxt);
11193 }
Owen Taylor3473f882001-02-23 17:55:21 +000011194 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011195 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011196 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011197 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011198 }
11199 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011200 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011201 }
11202
11203 if (!ctxt->wellFormed) {
11204 if (ctxt->errNo == 0)
11205 ret = 1;
11206 else
11207 ret = ctxt->errNo;
11208 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011209 ret = 0;
11210 }
11211
11212 if (lst != NULL && (ret == 0 || recover == 1)) {
11213 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011214
11215 /*
11216 * Return the newly created nodeset after unlinking it from
11217 * they pseudo parent.
11218 */
11219 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011220 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011221 while (cur != NULL) {
11222 cur->parent = NULL;
11223 cur = cur->next;
11224 }
11225 newDoc->children->children = NULL;
11226 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011227
Owen Taylor3473f882001-02-23 17:55:21 +000011228 if (sax != NULL)
11229 ctxt->sax = oldsax;
11230 xmlFreeParserCtxt(ctxt);
11231 newDoc->intSubset = NULL;
11232 newDoc->extSubset = NULL;
11233 xmlFreeDoc(newDoc);
11234
11235 return(ret);
11236}
11237
11238/**
11239 * xmlSAXParseEntity:
11240 * @sax: the SAX handler block
11241 * @filename: the filename
11242 *
11243 * parse an XML external entity out of context and build a tree.
11244 * It use the given SAX function block to handle the parsing callback.
11245 * If sax is NULL, fallback to the default DOM tree building routines.
11246 *
11247 * [78] extParsedEnt ::= TextDecl? content
11248 *
11249 * This correspond to a "Well Balanced" chunk
11250 *
11251 * Returns the resulting document tree
11252 */
11253
11254xmlDocPtr
11255xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11256 xmlDocPtr ret;
11257 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011258
11259 ctxt = xmlCreateFileParserCtxt(filename);
11260 if (ctxt == NULL) {
11261 return(NULL);
11262 }
11263 if (sax != NULL) {
11264 if (ctxt->sax != NULL)
11265 xmlFree(ctxt->sax);
11266 ctxt->sax = sax;
11267 ctxt->userData = NULL;
11268 }
11269
Owen Taylor3473f882001-02-23 17:55:21 +000011270 xmlParseExtParsedEnt(ctxt);
11271
11272 if (ctxt->wellFormed)
11273 ret = ctxt->myDoc;
11274 else {
11275 ret = NULL;
11276 xmlFreeDoc(ctxt->myDoc);
11277 ctxt->myDoc = NULL;
11278 }
11279 if (sax != NULL)
11280 ctxt->sax = NULL;
11281 xmlFreeParserCtxt(ctxt);
11282
11283 return(ret);
11284}
11285
11286/**
11287 * xmlParseEntity:
11288 * @filename: the filename
11289 *
11290 * parse an XML external entity out of context and build a tree.
11291 *
11292 * [78] extParsedEnt ::= TextDecl? content
11293 *
11294 * This correspond to a "Well Balanced" chunk
11295 *
11296 * Returns the resulting document tree
11297 */
11298
11299xmlDocPtr
11300xmlParseEntity(const char *filename) {
11301 return(xmlSAXParseEntity(NULL, filename));
11302}
11303
11304/**
11305 * xmlCreateEntityParserCtxt:
11306 * @URL: the entity URL
11307 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011308 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011309 *
11310 * Create a parser context for an external entity
11311 * Automatic support for ZLIB/Compress compressed document is provided
11312 * by default if found at compile-time.
11313 *
11314 * Returns the new parser context or NULL
11315 */
11316xmlParserCtxtPtr
11317xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11318 const xmlChar *base) {
11319 xmlParserCtxtPtr ctxt;
11320 xmlParserInputPtr inputStream;
11321 char *directory = NULL;
11322 xmlChar *uri;
11323
11324 ctxt = xmlNewParserCtxt();
11325 if (ctxt == NULL) {
11326 return(NULL);
11327 }
11328
11329 uri = xmlBuildURI(URL, base);
11330
11331 if (uri == NULL) {
11332 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11333 if (inputStream == NULL) {
11334 xmlFreeParserCtxt(ctxt);
11335 return(NULL);
11336 }
11337
11338 inputPush(ctxt, inputStream);
11339
11340 if ((ctxt->directory == NULL) && (directory == NULL))
11341 directory = xmlParserGetDirectory((char *)URL);
11342 if ((ctxt->directory == NULL) && (directory != NULL))
11343 ctxt->directory = directory;
11344 } else {
11345 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11346 if (inputStream == NULL) {
11347 xmlFree(uri);
11348 xmlFreeParserCtxt(ctxt);
11349 return(NULL);
11350 }
11351
11352 inputPush(ctxt, inputStream);
11353
11354 if ((ctxt->directory == NULL) && (directory == NULL))
11355 directory = xmlParserGetDirectory((char *)uri);
11356 if ((ctxt->directory == NULL) && (directory != NULL))
11357 ctxt->directory = directory;
11358 xmlFree(uri);
11359 }
Owen Taylor3473f882001-02-23 17:55:21 +000011360 return(ctxt);
11361}
11362
11363/************************************************************************
11364 * *
11365 * Front ends when parsing from a file *
11366 * *
11367 ************************************************************************/
11368
11369/**
11370 * xmlCreateFileParserCtxt:
11371 * @filename: the filename
11372 *
11373 * Create a parser context for a file content.
11374 * Automatic support for ZLIB/Compress compressed document is provided
11375 * by default if found at compile-time.
11376 *
11377 * Returns the new parser context or NULL
11378 */
11379xmlParserCtxtPtr
11380xmlCreateFileParserCtxt(const char *filename)
11381{
11382 xmlParserCtxtPtr ctxt;
11383 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011384 char *directory = NULL;
11385
Owen Taylor3473f882001-02-23 17:55:21 +000011386 ctxt = xmlNewParserCtxt();
11387 if (ctxt == NULL) {
11388 if (xmlDefaultSAXHandler.error != NULL) {
11389 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
11390 }
11391 return(NULL);
11392 }
11393
Igor Zlatkovicce076162003-02-23 13:39:39 +000011394
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011395 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011396 if (inputStream == NULL) {
11397 xmlFreeParserCtxt(ctxt);
11398 return(NULL);
11399 }
11400
Owen Taylor3473f882001-02-23 17:55:21 +000011401 inputPush(ctxt, inputStream);
11402 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011403 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011404 if ((ctxt->directory == NULL) && (directory != NULL))
11405 ctxt->directory = directory;
11406
11407 return(ctxt);
11408}
11409
11410/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011411 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011412 * @sax: the SAX handler block
11413 * @filename: the filename
11414 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11415 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011416 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011417 *
11418 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11419 * compressed document is provided by default if found at compile-time.
11420 * It use the given SAX function block to handle the parsing callback.
11421 * If sax is NULL, fallback to the default DOM tree building routines.
11422 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011423 * User data (void *) is stored within the parser context in the
11424 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011425 *
Owen Taylor3473f882001-02-23 17:55:21 +000011426 * Returns the resulting document tree
11427 */
11428
11429xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011430xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11431 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011432 xmlDocPtr ret;
11433 xmlParserCtxtPtr ctxt;
11434 char *directory = NULL;
11435
Daniel Veillard635ef722001-10-29 11:48:19 +000011436 xmlInitParser();
11437
Owen Taylor3473f882001-02-23 17:55:21 +000011438 ctxt = xmlCreateFileParserCtxt(filename);
11439 if (ctxt == NULL) {
11440 return(NULL);
11441 }
11442 if (sax != NULL) {
11443 if (ctxt->sax != NULL)
11444 xmlFree(ctxt->sax);
11445 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011446 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011447 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011448 if (data!=NULL) {
11449 ctxt->_private=data;
11450 }
Owen Taylor3473f882001-02-23 17:55:21 +000011451
11452 if ((ctxt->directory == NULL) && (directory == NULL))
11453 directory = xmlParserGetDirectory(filename);
11454 if ((ctxt->directory == NULL) && (directory != NULL))
11455 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11456
Daniel Veillarddad3f682002-11-17 16:47:27 +000011457 ctxt->recovery = recovery;
11458
Owen Taylor3473f882001-02-23 17:55:21 +000011459 xmlParseDocument(ctxt);
11460
William M. Brackc07329e2003-09-08 01:57:30 +000011461 if ((ctxt->wellFormed) || recovery) {
11462 ret = ctxt->myDoc;
11463 if (ctxt->input->buf->compressed > 0)
11464 ret->compression = 9;
11465 else
11466 ret->compression = ctxt->input->buf->compressed;
11467 }
Owen Taylor3473f882001-02-23 17:55:21 +000011468 else {
11469 ret = NULL;
11470 xmlFreeDoc(ctxt->myDoc);
11471 ctxt->myDoc = NULL;
11472 }
11473 if (sax != NULL)
11474 ctxt->sax = NULL;
11475 xmlFreeParserCtxt(ctxt);
11476
11477 return(ret);
11478}
11479
11480/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011481 * xmlSAXParseFile:
11482 * @sax: the SAX handler block
11483 * @filename: the filename
11484 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11485 * documents
11486 *
11487 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11488 * compressed document is provided by default if found at compile-time.
11489 * It use the given SAX function block to handle the parsing callback.
11490 * If sax is NULL, fallback to the default DOM tree building routines.
11491 *
11492 * Returns the resulting document tree
11493 */
11494
11495xmlDocPtr
11496xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11497 int recovery) {
11498 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11499}
11500
11501/**
Owen Taylor3473f882001-02-23 17:55:21 +000011502 * xmlRecoverDoc:
11503 * @cur: a pointer to an array of xmlChar
11504 *
11505 * parse an XML in-memory document and build a tree.
11506 * In the case the document is not Well Formed, a tree is built anyway
11507 *
11508 * Returns the resulting document tree
11509 */
11510
11511xmlDocPtr
11512xmlRecoverDoc(xmlChar *cur) {
11513 return(xmlSAXParseDoc(NULL, cur, 1));
11514}
11515
11516/**
11517 * xmlParseFile:
11518 * @filename: the filename
11519 *
11520 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11521 * compressed document is provided by default if found at compile-time.
11522 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011523 * Returns the resulting document tree if the file was wellformed,
11524 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011525 */
11526
11527xmlDocPtr
11528xmlParseFile(const char *filename) {
11529 return(xmlSAXParseFile(NULL, filename, 0));
11530}
11531
11532/**
11533 * xmlRecoverFile:
11534 * @filename: the filename
11535 *
11536 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11537 * compressed document is provided by default if found at compile-time.
11538 * In the case the document is not Well Formed, a tree is built anyway
11539 *
11540 * Returns the resulting document tree
11541 */
11542
11543xmlDocPtr
11544xmlRecoverFile(const char *filename) {
11545 return(xmlSAXParseFile(NULL, filename, 1));
11546}
11547
11548
11549/**
11550 * xmlSetupParserForBuffer:
11551 * @ctxt: an XML parser context
11552 * @buffer: a xmlChar * buffer
11553 * @filename: a file name
11554 *
11555 * Setup the parser context to parse a new buffer; Clears any prior
11556 * contents from the parser context. The buffer parameter must not be
11557 * NULL, but the filename parameter can be
11558 */
11559void
11560xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11561 const char* filename)
11562{
11563 xmlParserInputPtr input;
11564
11565 input = xmlNewInputStream(ctxt);
11566 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000011567 xmlGenericError(xmlGenericErrorContext,
11568 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000011569 xmlFree(ctxt);
11570 return;
11571 }
11572
11573 xmlClearParserCtxt(ctxt);
11574 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011575 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011576 input->base = buffer;
11577 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011578 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011579 inputPush(ctxt, input);
11580}
11581
11582/**
11583 * xmlSAXUserParseFile:
11584 * @sax: a SAX handler
11585 * @user_data: The user data returned on SAX callbacks
11586 * @filename: a file name
11587 *
11588 * parse an XML file and call the given SAX handler routines.
11589 * Automatic support for ZLIB/Compress compressed document is provided
11590 *
11591 * Returns 0 in case of success or a error number otherwise
11592 */
11593int
11594xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11595 const char *filename) {
11596 int ret = 0;
11597 xmlParserCtxtPtr ctxt;
11598
11599 ctxt = xmlCreateFileParserCtxt(filename);
11600 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000011601 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000011602 xmlFree(ctxt->sax);
11603 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011604 xmlDetectSAX2(ctxt);
11605
Owen Taylor3473f882001-02-23 17:55:21 +000011606 if (user_data != NULL)
11607 ctxt->userData = user_data;
11608
11609 xmlParseDocument(ctxt);
11610
11611 if (ctxt->wellFormed)
11612 ret = 0;
11613 else {
11614 if (ctxt->errNo != 0)
11615 ret = ctxt->errNo;
11616 else
11617 ret = -1;
11618 }
11619 if (sax != NULL)
11620 ctxt->sax = NULL;
11621 xmlFreeParserCtxt(ctxt);
11622
11623 return ret;
11624}
11625
11626/************************************************************************
11627 * *
11628 * Front ends when parsing from memory *
11629 * *
11630 ************************************************************************/
11631
11632/**
11633 * xmlCreateMemoryParserCtxt:
11634 * @buffer: a pointer to a char array
11635 * @size: the size of the array
11636 *
11637 * Create a parser context for an XML in-memory document.
11638 *
11639 * Returns the new parser context or NULL
11640 */
11641xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011642xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011643 xmlParserCtxtPtr ctxt;
11644 xmlParserInputPtr input;
11645 xmlParserInputBufferPtr buf;
11646
11647 if (buffer == NULL)
11648 return(NULL);
11649 if (size <= 0)
11650 return(NULL);
11651
11652 ctxt = xmlNewParserCtxt();
11653 if (ctxt == NULL)
11654 return(NULL);
11655
Daniel Veillard53350552003-09-18 13:35:51 +000011656 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011657 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011658 if (buf == NULL) {
11659 xmlFreeParserCtxt(ctxt);
11660 return(NULL);
11661 }
Owen Taylor3473f882001-02-23 17:55:21 +000011662
11663 input = xmlNewInputStream(ctxt);
11664 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011665 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011666 xmlFreeParserCtxt(ctxt);
11667 return(NULL);
11668 }
11669
11670 input->filename = NULL;
11671 input->buf = buf;
11672 input->base = input->buf->buffer->content;
11673 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011674 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011675
11676 inputPush(ctxt, input);
11677 return(ctxt);
11678}
11679
11680/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011681 * xmlSAXParseMemoryWithData:
11682 * @sax: the SAX handler block
11683 * @buffer: an pointer to a char array
11684 * @size: the size of the array
11685 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11686 * documents
11687 * @data: the userdata
11688 *
11689 * parse an XML in-memory block and use the given SAX function block
11690 * to handle the parsing callback. If sax is NULL, fallback to the default
11691 * DOM tree building routines.
11692 *
11693 * User data (void *) is stored within the parser context in the
11694 * context's _private member, so it is available nearly everywhere in libxml
11695 *
11696 * Returns the resulting document tree
11697 */
11698
11699xmlDocPtr
11700xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11701 int size, int recovery, void *data) {
11702 xmlDocPtr ret;
11703 xmlParserCtxtPtr ctxt;
11704
11705 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11706 if (ctxt == NULL) return(NULL);
11707 if (sax != NULL) {
11708 if (ctxt->sax != NULL)
11709 xmlFree(ctxt->sax);
11710 ctxt->sax = sax;
11711 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011712 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011713 if (data!=NULL) {
11714 ctxt->_private=data;
11715 }
11716
Daniel Veillardadba5f12003-04-04 16:09:01 +000011717 ctxt->recovery = recovery;
11718
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011719 xmlParseDocument(ctxt);
11720
11721 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11722 else {
11723 ret = NULL;
11724 xmlFreeDoc(ctxt->myDoc);
11725 ctxt->myDoc = NULL;
11726 }
11727 if (sax != NULL)
11728 ctxt->sax = NULL;
11729 xmlFreeParserCtxt(ctxt);
11730
11731 return(ret);
11732}
11733
11734/**
Owen Taylor3473f882001-02-23 17:55:21 +000011735 * xmlSAXParseMemory:
11736 * @sax: the SAX handler block
11737 * @buffer: an pointer to a char array
11738 * @size: the size of the array
11739 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11740 * documents
11741 *
11742 * parse an XML in-memory block and use the given SAX function block
11743 * to handle the parsing callback. If sax is NULL, fallback to the default
11744 * DOM tree building routines.
11745 *
11746 * Returns the resulting document tree
11747 */
11748xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011749xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11750 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011751 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011752}
11753
11754/**
11755 * xmlParseMemory:
11756 * @buffer: an pointer to a char array
11757 * @size: the size of the array
11758 *
11759 * parse an XML in-memory block and build a tree.
11760 *
11761 * Returns the resulting document tree
11762 */
11763
Daniel Veillard50822cb2001-07-26 20:05:51 +000011764xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011765 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11766}
11767
11768/**
11769 * xmlRecoverMemory:
11770 * @buffer: an pointer to a char array
11771 * @size: the size of the array
11772 *
11773 * parse an XML in-memory block and build a tree.
11774 * In the case the document is not Well Formed, a tree is built anyway
11775 *
11776 * Returns the resulting document tree
11777 */
11778
Daniel Veillard50822cb2001-07-26 20:05:51 +000011779xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011780 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11781}
11782
11783/**
11784 * xmlSAXUserParseMemory:
11785 * @sax: a SAX handler
11786 * @user_data: The user data returned on SAX callbacks
11787 * @buffer: an in-memory XML document input
11788 * @size: the length of the XML document in bytes
11789 *
11790 * A better SAX parsing routine.
11791 * parse an XML in-memory buffer and call the given SAX handler routines.
11792 *
11793 * Returns 0 in case of success or a error number otherwise
11794 */
11795int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011796 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011797 int ret = 0;
11798 xmlParserCtxtPtr ctxt;
11799 xmlSAXHandlerPtr oldsax = NULL;
11800
Daniel Veillard9e923512002-08-14 08:48:52 +000011801 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011802 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11803 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011804 oldsax = ctxt->sax;
11805 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011806 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011807 if (user_data != NULL)
11808 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011809
11810 xmlParseDocument(ctxt);
11811
11812 if (ctxt->wellFormed)
11813 ret = 0;
11814 else {
11815 if (ctxt->errNo != 0)
11816 ret = ctxt->errNo;
11817 else
11818 ret = -1;
11819 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011820 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011821 xmlFreeParserCtxt(ctxt);
11822
11823 return ret;
11824}
11825
11826/**
11827 * xmlCreateDocParserCtxt:
11828 * @cur: a pointer to an array of xmlChar
11829 *
11830 * Creates a parser context for an XML in-memory document.
11831 *
11832 * Returns the new parser context or NULL
11833 */
11834xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011835xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011836 int len;
11837
11838 if (cur == NULL)
11839 return(NULL);
11840 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011841 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011842}
11843
11844/**
11845 * xmlSAXParseDoc:
11846 * @sax: the SAX handler block
11847 * @cur: a pointer to an array of xmlChar
11848 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11849 * documents
11850 *
11851 * parse an XML in-memory document and build a tree.
11852 * It use the given SAX function block to handle the parsing callback.
11853 * If sax is NULL, fallback to the default DOM tree building routines.
11854 *
11855 * Returns the resulting document tree
11856 */
11857
11858xmlDocPtr
11859xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11860 xmlDocPtr ret;
11861 xmlParserCtxtPtr ctxt;
11862
11863 if (cur == NULL) return(NULL);
11864
11865
11866 ctxt = xmlCreateDocParserCtxt(cur);
11867 if (ctxt == NULL) return(NULL);
11868 if (sax != NULL) {
11869 ctxt->sax = sax;
11870 ctxt->userData = NULL;
11871 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011872 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011873
11874 xmlParseDocument(ctxt);
11875 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11876 else {
11877 ret = NULL;
11878 xmlFreeDoc(ctxt->myDoc);
11879 ctxt->myDoc = NULL;
11880 }
11881 if (sax != NULL)
11882 ctxt->sax = NULL;
11883 xmlFreeParserCtxt(ctxt);
11884
11885 return(ret);
11886}
11887
11888/**
11889 * xmlParseDoc:
11890 * @cur: a pointer to an array of xmlChar
11891 *
11892 * parse an XML in-memory document and build a tree.
11893 *
11894 * Returns the resulting document tree
11895 */
11896
11897xmlDocPtr
11898xmlParseDoc(xmlChar *cur) {
11899 return(xmlSAXParseDoc(NULL, cur, 0));
11900}
11901
Daniel Veillard8107a222002-01-13 14:10:10 +000011902/************************************************************************
11903 * *
11904 * Specific function to keep track of entities references *
11905 * and used by the XSLT debugger *
11906 * *
11907 ************************************************************************/
11908
11909static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11910
11911/**
11912 * xmlAddEntityReference:
11913 * @ent : A valid entity
11914 * @firstNode : A valid first node for children of entity
11915 * @lastNode : A valid last node of children entity
11916 *
11917 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11918 */
11919static void
11920xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11921 xmlNodePtr lastNode)
11922{
11923 if (xmlEntityRefFunc != NULL) {
11924 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11925 }
11926}
11927
11928
11929/**
11930 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011931 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011932 *
11933 * Set the function to call call back when a xml reference has been made
11934 */
11935void
11936xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11937{
11938 xmlEntityRefFunc = func;
11939}
Owen Taylor3473f882001-02-23 17:55:21 +000011940
11941/************************************************************************
11942 * *
11943 * Miscellaneous *
11944 * *
11945 ************************************************************************/
11946
11947#ifdef LIBXML_XPATH_ENABLED
11948#include <libxml/xpath.h>
11949#endif
11950
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011951extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011952static int xmlParserInitialized = 0;
11953
11954/**
11955 * xmlInitParser:
11956 *
11957 * Initialization function for the XML parser.
11958 * This is not reentrant. Call once before processing in case of
11959 * use in multithreaded programs.
11960 */
11961
11962void
11963xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011964 if (xmlParserInitialized != 0)
11965 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011966
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011967 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11968 (xmlGenericError == NULL))
11969 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011970 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011971 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011972 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011973 xmlInitCharEncodingHandlers();
11974 xmlInitializePredefinedEntities();
11975 xmlDefaultSAXHandlerInit();
11976 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011977#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011978 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011979#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011980#ifdef LIBXML_HTML_ENABLED
11981 htmlInitAutoClose();
11982 htmlDefaultSAXHandlerInit();
11983#endif
11984#ifdef LIBXML_XPATH_ENABLED
11985 xmlXPathInit();
11986#endif
11987 xmlParserInitialized = 1;
11988}
11989
11990/**
11991 * xmlCleanupParser:
11992 *
11993 * Cleanup function for the XML parser. It tries to reclaim all
11994 * parsing related global memory allocated for the parser processing.
11995 * It doesn't deallocate any document related memory. Calling this
11996 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011997 * One should call xmlCleanupParser() only when the process has
11998 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011999 */
12000
12001void
12002xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012003 if (!xmlParserInitialized)
12004 return;
12005
Owen Taylor3473f882001-02-23 17:55:21 +000012006 xmlCleanupCharEncodingHandlers();
12007 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012008#ifdef LIBXML_CATALOG_ENABLED
12009 xmlCatalogCleanup();
12010#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000012011 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012012 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012013 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012014}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012015
12016/************************************************************************
12017 * *
12018 * New set (2.6.0) of simpler and more flexible APIs *
12019 * *
12020 ************************************************************************/
12021
12022/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012023 * DICT_FREE:
12024 * @str: a string
12025 *
12026 * Free a string if it is not owned by the "dict" dictionnary in the
12027 * current scope
12028 */
12029#define DICT_FREE(str) \
12030 if ((str) && ((!dict) || \
12031 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12032 xmlFree((char *)(str));
12033
12034/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012035 * xmlCtxtReset:
12036 * @ctxt: an XML parser context
12037 *
12038 * Reset a parser context
12039 */
12040void
12041xmlCtxtReset(xmlParserCtxtPtr ctxt)
12042{
12043 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012044 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012045
12046 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12047 xmlFreeInputStream(input);
12048 }
12049 ctxt->inputNr = 0;
12050 ctxt->input = NULL;
12051
12052 ctxt->spaceNr = 0;
12053 ctxt->spaceTab[0] = -1;
12054 ctxt->space = &ctxt->spaceTab[0];
12055
12056
12057 ctxt->nodeNr = 0;
12058 ctxt->node = NULL;
12059
12060 ctxt->nameNr = 0;
12061 ctxt->name = NULL;
12062
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012063 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012064 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012065 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012066 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012067 DICT_FREE(ctxt->directory);
12068 ctxt->directory = NULL;
12069 DICT_FREE(ctxt->extSubURI);
12070 ctxt->extSubURI = NULL;
12071 DICT_FREE(ctxt->extSubSystem);
12072 ctxt->extSubSystem = NULL;
12073 if (ctxt->myDoc != NULL)
12074 xmlFreeDoc(ctxt->myDoc);
12075 ctxt->myDoc = NULL;
12076
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012077 ctxt->standalone = -1;
12078 ctxt->hasExternalSubset = 0;
12079 ctxt->hasPErefs = 0;
12080 ctxt->html = 0;
12081 ctxt->external = 0;
12082 ctxt->instate = XML_PARSER_START;
12083 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012084
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012085 ctxt->wellFormed = 1;
12086 ctxt->nsWellFormed = 1;
12087 ctxt->valid = 1;
12088 ctxt->vctxt.userData = ctxt;
12089 ctxt->vctxt.error = xmlParserValidityError;
12090 ctxt->vctxt.warning = xmlParserValidityWarning;
12091 ctxt->record_info = 0;
12092 ctxt->nbChars = 0;
12093 ctxt->checkIndex = 0;
12094 ctxt->inSubset = 0;
12095 ctxt->errNo = XML_ERR_OK;
12096 ctxt->depth = 0;
12097 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12098 ctxt->catalogs = NULL;
12099 xmlInitNodeInfoSeq(&ctxt->node_seq);
12100
12101 if (ctxt->attsDefault != NULL) {
12102 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12103 ctxt->attsDefault = NULL;
12104 }
12105 if (ctxt->attsSpecial != NULL) {
12106 xmlHashFree(ctxt->attsSpecial, NULL);
12107 ctxt->attsSpecial = NULL;
12108 }
12109
Daniel Veillard4432df22003-09-28 18:58:27 +000012110#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012111 if (ctxt->catalogs != NULL)
12112 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012113#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012114}
12115
12116/**
12117 * xmlCtxtUseOptions:
12118 * @ctxt: an XML parser context
12119 * @options: a combination of xmlParserOption(s)
12120 *
12121 * Applies the options to the parser context
12122 *
12123 * Returns 0 in case of success, the set of unknown or unimplemented options
12124 * in case of error.
12125 */
12126int
12127xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12128{
12129 if (options & XML_PARSE_RECOVER) {
12130 ctxt->recovery = 1;
12131 options -= XML_PARSE_RECOVER;
12132 } else
12133 ctxt->recovery = 0;
12134 if (options & XML_PARSE_DTDLOAD) {
12135 ctxt->loadsubset = XML_DETECT_IDS;
12136 options -= XML_PARSE_DTDLOAD;
12137 } else
12138 ctxt->loadsubset = 0;
12139 if (options & XML_PARSE_DTDATTR) {
12140 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12141 options -= XML_PARSE_DTDATTR;
12142 }
12143 if (options & XML_PARSE_NOENT) {
12144 ctxt->replaceEntities = 1;
12145 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12146 options -= XML_PARSE_NOENT;
12147 } else
12148 ctxt->replaceEntities = 0;
12149 if (options & XML_PARSE_NOWARNING) {
12150 ctxt->sax->warning = NULL;
12151 options -= XML_PARSE_NOWARNING;
12152 }
12153 if (options & XML_PARSE_NOERROR) {
12154 ctxt->sax->error = NULL;
12155 ctxt->sax->fatalError = NULL;
12156 options -= XML_PARSE_NOERROR;
12157 }
12158 if (options & XML_PARSE_PEDANTIC) {
12159 ctxt->pedantic = 1;
12160 options -= XML_PARSE_PEDANTIC;
12161 } else
12162 ctxt->pedantic = 0;
12163 if (options & XML_PARSE_NOBLANKS) {
12164 ctxt->keepBlanks = 0;
12165 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12166 options -= XML_PARSE_NOBLANKS;
12167 } else
12168 ctxt->keepBlanks = 1;
12169 if (options & XML_PARSE_DTDVALID) {
12170 ctxt->validate = 1;
12171 if (options & XML_PARSE_NOWARNING)
12172 ctxt->vctxt.warning = NULL;
12173 if (options & XML_PARSE_NOERROR)
12174 ctxt->vctxt.error = NULL;
12175 options -= XML_PARSE_DTDVALID;
12176 } else
12177 ctxt->validate = 0;
12178 if (options & XML_PARSE_SAX1) {
12179 ctxt->sax->startElement = xmlSAX2StartElement;
12180 ctxt->sax->endElement = xmlSAX2EndElement;
12181 ctxt->sax->startElementNs = NULL;
12182 ctxt->sax->endElementNs = NULL;
12183 ctxt->sax->initialized = 1;
12184 options -= XML_PARSE_SAX1;
12185 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012186 if (options & XML_PARSE_NODICT) {
12187 ctxt->dictNames = 0;
12188 options -= XML_PARSE_NODICT;
12189 } else {
12190 ctxt->dictNames = 1;
12191 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012192 if (options & XML_PARSE_NOCDATA) {
12193 ctxt->sax->cdataBlock = NULL;
12194 options -= XML_PARSE_NOCDATA;
12195 }
12196 if (options & XML_PARSE_NSCLEAN) {
12197 ctxt->options |= XML_PARSE_NSCLEAN;
12198 options -= XML_PARSE_NSCLEAN;
12199 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012200 return (options);
12201}
12202
12203/**
12204 * xmlDoRead:
12205 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012206 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012207 * @encoding: the document encoding, or NULL
12208 * @options: a combination of xmlParserOption(s)
12209 * @reuse: keep the context for reuse
12210 *
12211 * Common front-end for the xmlRead functions
12212 *
12213 * Returns the resulting document tree or NULL
12214 */
12215static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012216xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12217 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012218{
12219 xmlDocPtr ret;
12220
12221 xmlCtxtUseOptions(ctxt, options);
12222 if (encoding != NULL) {
12223 xmlCharEncodingHandlerPtr hdlr;
12224
12225 hdlr = xmlFindCharEncodingHandler(encoding);
12226 if (hdlr != NULL)
12227 xmlSwitchToEncoding(ctxt, hdlr);
12228 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012229 if ((URL != NULL) && (ctxt->input != NULL) &&
12230 (ctxt->input->filename == NULL))
12231 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012232 xmlParseDocument(ctxt);
12233 if ((ctxt->wellFormed) || ctxt->recovery)
12234 ret = ctxt->myDoc;
12235 else {
12236 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012237 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012238 if ((ctxt->dictNames) &&
12239 (ctxt->myDoc->dict == ctxt->dict))
12240 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012241 xmlFreeDoc(ctxt->myDoc);
12242 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012243 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012244 ctxt->myDoc = NULL;
12245 if (!reuse) {
12246 if ((ctxt->dictNames) &&
12247 (ret != NULL) &&
12248 (ret->dict == ctxt->dict))
12249 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012250 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012251 } else {
12252 /* Must duplicate the reference to the dictionary */
12253 if ((ctxt->dictNames) &&
12254 (ret != NULL) &&
12255 (ret->dict == ctxt->dict))
12256 xmlDictReference(ctxt->dict);
12257 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258
12259 return (ret);
12260}
12261
12262/**
12263 * xmlReadDoc:
12264 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012265 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012266 * @encoding: the document encoding, or NULL
12267 * @options: a combination of xmlParserOption(s)
12268 *
12269 * parse an XML in-memory document and build a tree.
12270 *
12271 * Returns the resulting document tree
12272 */
12273xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012274xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012275{
12276 xmlParserCtxtPtr ctxt;
12277
12278 if (cur == NULL)
12279 return (NULL);
12280
12281 ctxt = xmlCreateDocParserCtxt(cur);
12282 if (ctxt == NULL)
12283 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012284 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012285}
12286
12287/**
12288 * xmlReadFile:
12289 * @filename: a file or URL
12290 * @encoding: the document encoding, or NULL
12291 * @options: a combination of xmlParserOption(s)
12292 *
12293 * parse an XML file from the filesystem or the network.
12294 *
12295 * Returns the resulting document tree
12296 */
12297xmlDocPtr
12298xmlReadFile(const char *filename, const char *encoding, int options)
12299{
12300 xmlParserCtxtPtr ctxt;
12301
12302 ctxt = xmlCreateFileParserCtxt(filename);
12303 if (ctxt == NULL)
12304 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012305 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012306}
12307
12308/**
12309 * xmlReadMemory:
12310 * @buffer: a pointer to a char array
12311 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012312 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012313 * @encoding: the document encoding, or NULL
12314 * @options: a combination of xmlParserOption(s)
12315 *
12316 * parse an XML in-memory document and build a tree.
12317 *
12318 * Returns the resulting document tree
12319 */
12320xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012321xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012322{
12323 xmlParserCtxtPtr ctxt;
12324
12325 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12326 if (ctxt == NULL)
12327 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012328 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012329}
12330
12331/**
12332 * xmlReadFd:
12333 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012334 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012335 * @encoding: the document encoding, or NULL
12336 * @options: a combination of xmlParserOption(s)
12337 *
12338 * parse an XML from a file descriptor and build a tree.
12339 *
12340 * Returns the resulting document tree
12341 */
12342xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012343xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012344{
12345 xmlParserCtxtPtr ctxt;
12346 xmlParserInputBufferPtr input;
12347 xmlParserInputPtr stream;
12348
12349 if (fd < 0)
12350 return (NULL);
12351
12352 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12353 if (input == NULL)
12354 return (NULL);
12355 ctxt = xmlNewParserCtxt();
12356 if (ctxt == NULL) {
12357 xmlFreeParserInputBuffer(input);
12358 return (NULL);
12359 }
12360 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12361 if (stream == NULL) {
12362 xmlFreeParserInputBuffer(input);
12363 xmlFreeParserCtxt(ctxt);
12364 return (NULL);
12365 }
12366 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012367 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012368}
12369
12370/**
12371 * xmlReadIO:
12372 * @ioread: an I/O read function
12373 * @ioclose: an I/O close function
12374 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012375 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012376 * @encoding: the document encoding, or NULL
12377 * @options: a combination of xmlParserOption(s)
12378 *
12379 * parse an XML document from I/O functions and source and build a tree.
12380 *
12381 * Returns the resulting document tree
12382 */
12383xmlDocPtr
12384xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012385 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012386{
12387 xmlParserCtxtPtr ctxt;
12388 xmlParserInputBufferPtr input;
12389 xmlParserInputPtr stream;
12390
12391 if (ioread == NULL)
12392 return (NULL);
12393
12394 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12395 XML_CHAR_ENCODING_NONE);
12396 if (input == NULL)
12397 return (NULL);
12398 ctxt = xmlNewParserCtxt();
12399 if (ctxt == NULL) {
12400 xmlFreeParserInputBuffer(input);
12401 return (NULL);
12402 }
12403 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12404 if (stream == NULL) {
12405 xmlFreeParserInputBuffer(input);
12406 xmlFreeParserCtxt(ctxt);
12407 return (NULL);
12408 }
12409 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012410 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012411}
12412
12413/**
12414 * xmlCtxtReadDoc:
12415 * @ctxt: an XML parser context
12416 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012417 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012418 * @encoding: the document encoding, or NULL
12419 * @options: a combination of xmlParserOption(s)
12420 *
12421 * parse an XML in-memory document and build a tree.
12422 * This reuses the existing @ctxt parser context
12423 *
12424 * Returns the resulting document tree
12425 */
12426xmlDocPtr
12427xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012428 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012429{
12430 xmlParserInputPtr stream;
12431
12432 if (cur == NULL)
12433 return (NULL);
12434 if (ctxt == NULL)
12435 return (NULL);
12436
12437 xmlCtxtReset(ctxt);
12438
12439 stream = xmlNewStringInputStream(ctxt, cur);
12440 if (stream == NULL) {
12441 return (NULL);
12442 }
12443 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012444 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012445}
12446
12447/**
12448 * xmlCtxtReadFile:
12449 * @ctxt: an XML parser context
12450 * @filename: a file or URL
12451 * @encoding: the document encoding, or NULL
12452 * @options: a combination of xmlParserOption(s)
12453 *
12454 * parse an XML file from the filesystem or the network.
12455 * This reuses the existing @ctxt parser context
12456 *
12457 * Returns the resulting document tree
12458 */
12459xmlDocPtr
12460xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12461 const char *encoding, int options)
12462{
12463 xmlParserInputPtr stream;
12464
12465 if (filename == NULL)
12466 return (NULL);
12467 if (ctxt == NULL)
12468 return (NULL);
12469
12470 xmlCtxtReset(ctxt);
12471
12472 stream = xmlNewInputFromFile(ctxt, filename);
12473 if (stream == NULL) {
12474 return (NULL);
12475 }
12476 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012477 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012478}
12479
12480/**
12481 * xmlCtxtReadMemory:
12482 * @ctxt: an XML parser context
12483 * @buffer: a pointer to a char array
12484 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012485 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012486 * @encoding: the document encoding, or NULL
12487 * @options: a combination of xmlParserOption(s)
12488 *
12489 * parse an XML in-memory document and build a tree.
12490 * This reuses the existing @ctxt parser context
12491 *
12492 * Returns the resulting document tree
12493 */
12494xmlDocPtr
12495xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012496 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012497{
12498 xmlParserInputBufferPtr input;
12499 xmlParserInputPtr stream;
12500
12501 if (ctxt == NULL)
12502 return (NULL);
12503 if (buffer == NULL)
12504 return (NULL);
12505
12506 xmlCtxtReset(ctxt);
12507
12508 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12509 if (input == NULL) {
12510 return(NULL);
12511 }
12512
12513 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12514 if (stream == NULL) {
12515 xmlFreeParserInputBuffer(input);
12516 return(NULL);
12517 }
12518
12519 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012520 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012521}
12522
12523/**
12524 * xmlCtxtReadFd:
12525 * @ctxt: an XML parser context
12526 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012527 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012528 * @encoding: the document encoding, or NULL
12529 * @options: a combination of xmlParserOption(s)
12530 *
12531 * parse an XML from a file descriptor and build a tree.
12532 * This reuses the existing @ctxt parser context
12533 *
12534 * Returns the resulting document tree
12535 */
12536xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012537xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12538 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012539{
12540 xmlParserInputBufferPtr input;
12541 xmlParserInputPtr stream;
12542
12543 if (fd < 0)
12544 return (NULL);
12545 if (ctxt == NULL)
12546 return (NULL);
12547
12548 xmlCtxtReset(ctxt);
12549
12550
12551 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12552 if (input == NULL)
12553 return (NULL);
12554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12555 if (stream == NULL) {
12556 xmlFreeParserInputBuffer(input);
12557 return (NULL);
12558 }
12559 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012560 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561}
12562
12563/**
12564 * xmlCtxtReadIO:
12565 * @ctxt: an XML parser context
12566 * @ioread: an I/O read function
12567 * @ioclose: an I/O close function
12568 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012569 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012570 * @encoding: the document encoding, or NULL
12571 * @options: a combination of xmlParserOption(s)
12572 *
12573 * parse an XML document from I/O functions and source and build a tree.
12574 * This reuses the existing @ctxt parser context
12575 *
12576 * Returns the resulting document tree
12577 */
12578xmlDocPtr
12579xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12580 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012581 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012582 const char *encoding, int options)
12583{
12584 xmlParserInputBufferPtr input;
12585 xmlParserInputPtr stream;
12586
12587 if (ioread == NULL)
12588 return (NULL);
12589 if (ctxt == NULL)
12590 return (NULL);
12591
12592 xmlCtxtReset(ctxt);
12593
12594 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12595 XML_CHAR_ENCODING_NONE);
12596 if (input == NULL)
12597 return (NULL);
12598 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12599 if (stream == NULL) {
12600 xmlFreeParserInputBuffer(input);
12601 return (NULL);
12602 }
12603 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012604 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012605}