blob: eb3a8a0ebd94f15dec4c38dbf05018145a377aaa [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000079/**
80 * MAX_DEPTH:
81 *
82 * arbitrary depth limit for the XML documents that we allow to
83 * process. This is not a limitation of the parser but a safety
84 * boundary feature.
85 */
86#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard0fb18932003-09-07 09:14:37 +000088#define SAX2 1
89
Daniel Veillard21a0f912001-02-25 19:54:14 +000090#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000091#define XML_PARSER_BUFFER_SIZE 100
92
Daniel Veillard5997aca2002-03-18 18:36:20 +000093#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
94
Owen Taylor3473f882001-02-23 17:55:21 +000095/*
Owen Taylor3473f882001-02-23 17:55:21 +000096 * List of XML prefixed PI allowed by W3C specs
97 */
98
Daniel Veillardb44025c2001-10-11 22:55:55 +000099static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000100 "xml-stylesheet",
101 NULL
102};
103
104/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000105xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
106 const xmlChar **str);
107
Daniel Veillard257d9102001-05-08 10:41:44 +0000108static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000109xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
110 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000111 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000112 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000113
Daniel Veillard8107a222002-01-13 14:10:10 +0000114static void
115xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
116 xmlNodePtr lastNode);
117
Daniel Veillard328f48c2002-11-15 15:24:34 +0000118static int
119xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
120 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000121
122/************************************************************************
123 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000124 * Some factorized error routines *
125 * *
126 ************************************************************************/
127
128/**
129 * xmlErrMemory:
130 * @ctxt: an XML parser context
131 * @extra: extra informations
132 *
133 * Handle a redefinition of attribute error
134 */
135static void
136xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
137{
138 if (ctxt != NULL) {
139 ctxt->errNo = XML_ERR_NO_MEMORY;
140 ctxt->instate = XML_PARSER_EOF;
141 ctxt->disableSAX = 1;
142 }
143 if ((ctxt != NULL) && (ctxt->sax != NULL)
144 && (ctxt->sax->error != NULL)) {
145 if (extra)
146 ctxt->sax->error(ctxt->userData,
147 "Memory allocation failed : %s\n", extra);
148 else
149 ctxt->sax->error(ctxt->userData,
150 "Memory allocation failed !\n");
151 } else {
152 if (extra)
153 xmlGenericError(xmlGenericErrorContext,
154 "Memory allocation failed : %s\n", extra);
155 else
156 xmlGenericError(xmlGenericErrorContext,
157 "Memory allocation failed !\n");
158 }
159}
160
161/**
162 * xmlErrAttributeDup:
163 * @ctxt: an XML parser context
164 * @prefix: the attribute prefix
165 * @localname: the attribute localname
166 *
167 * Handle a redefinition of attribute error
168 */
169static void
170xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
171 const xmlChar * localname)
172{
173 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
175 if (prefix == NULL)
176 ctxt->sax->error(ctxt->userData,
177 "Attribute %s redefined\n", localname);
178 else
179 ctxt->sax->error(ctxt->userData,
180 "Attribute %s:%s redefined\n", prefix,
181 localname);
182 }
183 ctxt->wellFormed = 0;
184 if (ctxt->recovery == 0)
185 ctxt->disableSAX = 1;
186}
187
188/**
189 * xmlFatalErr:
190 * @ctxt: an XML parser context
191 * @error: the error number
192 * @extra: extra information string
193 *
194 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
195 */
196static void
197xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char * info)
198{
199 const char *errmsg;
200
201 if (ctxt == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "xmlFatalErr: no context !\n");
204 return;
205 }
206 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
207 return;
208 switch (error) {
209 case XML_ERR_INVALID_HEX_CHARREF:
210 errmsg = "CharRef: invalid hexadecimal value\n";
211 break;
212 case XML_ERR_INVALID_DEC_CHARREF:
213 errmsg = "CharRef: invalid decimal value\n";
214 break;
215 case XML_ERR_INVALID_CHARREF:
216 errmsg = "CharRef: invalid value\n";
217 break;
218 case XML_ERR_INTERNAL_ERROR:
219 errmsg = "internal error";
220 break;
221 case XML_ERR_PEREF_AT_EOF:
222 errmsg = "PEReference at end of document\n";
223 break;
224 case XML_ERR_PEREF_IN_PROLOG:
225 errmsg = "PEReference in prolog\n";
226 break;
227 case XML_ERR_PEREF_IN_EPILOG:
228 errmsg = "PEReference in epilog\n";
229 break;
230 case XML_ERR_PEREF_NO_NAME:
231 errmsg = "PEReference: no name\n";
232 break;
233 case XML_ERR_PEREF_SEMICOL_MISSING:
234 errmsg = "PEReference: expecting ';'\n";
235 break;
236 case XML_ERR_ENTITY_LOOP:
237 errmsg = "Detected an entity reference loop\n";
238 break;
239 case XML_ERR_ENTITY_NOT_STARTED:
240 errmsg = "EntityValue: \" or ' expected\n";
241 break;
242 case XML_ERR_ENTITY_PE_INTERNAL:
243 errmsg = "PEReferences forbidden in internal subset\n";
244 break;
245 case XML_ERR_ENTITY_NOT_FINISHED:
246 errmsg = "EntityValue: \" or ' expected\n";
247 break;
248 case XML_ERR_ATTRIBUTE_NOT_STARTED:
249 errmsg = "AttValue: \" or ' expected\n";
250 break;
251 case XML_ERR_LT_IN_ATTRIBUTE:
252 errmsg = "Unescaped '<' not allowed in attributes values\n";
253 break;
254 case XML_ERR_LITERAL_NOT_STARTED:
255 errmsg = "SystemLiteral \" or ' expected\n";
256 break;
257 case XML_ERR_LITERAL_NOT_FINISHED:
258 errmsg = "Unfinished System or Public ID \" or ' expected\n";
259 break;
260 case XML_ERR_MISPLACED_CDATA_END:
261 errmsg = "Sequence ']]>' not allowed in content\n";
262 break;
263 case XML_ERR_URI_REQUIRED:
264 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
265 break;
266 case XML_ERR_PUBID_REQUIRED:
267 errmsg = "PUBLIC, the Public Identifier is missing\n";
268 break;
269 case XML_ERR_HYPHEN_IN_COMMENT:
270 errmsg = "Comment must not contain '--' (double-hyphen)\n";
271 break;
272 case XML_ERR_PI_NOT_STARTED:
273 errmsg = "xmlParsePI : no target name\n";
274 break;
275 case XML_ERR_RESERVED_XML_NAME:
276 errmsg = "Invalid PI name\n";
277 break;
278 case XML_ERR_NOTATION_NOT_STARTED:
279 errmsg = "NOTATION: Name expected here\n";
280 break;
281 case XML_ERR_NOTATION_NOT_FINISHED:
282 errmsg = "'>' required to close NOTATION declaration\n";
283 break;
284 case XML_ERR_VALUE_REQUIRED:
285 errmsg = "Entity value required\n";
286 break;
287 case XML_ERR_URI_FRAGMENT:
288 errmsg = "Fragment not allowed";
289 break;
290 case XML_ERR_ATTLIST_NOT_STARTED:
291 errmsg = "'(' required to start ATTLIST enumeration\n";
292 break;
293 case XML_ERR_NMTOKEN_REQUIRED:
294 errmsg = "NmToken expected in ATTLIST enumeration\n";
295 break;
296 case XML_ERR_ATTLIST_NOT_FINISHED:
297 errmsg = "')' required to finish ATTLIST enumeration\n";
298 break;
299 case XML_ERR_MIXED_NOT_STARTED:
300 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
301 break;
302 case XML_ERR_PCDATA_REQUIRED:
303 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
304 break;
305 case XML_ERR_ELEMCONTENT_NOT_STARTED:
306 errmsg = "ContentDecl : Name or '(' expected\n";
307 break;
308 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
309 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
310 break;
311 case XML_ERR_PEREF_IN_INT_SUBSET:
312 errmsg = "PEReference: forbidden within markup decl in internal subset\n";
313 break;
314 case XML_ERR_GT_REQUIRED:
315 errmsg = "expected '>'\n";
316 break;
317 case XML_ERR_CONDSEC_INVALID:
318 errmsg = "XML conditional section '[' expected\n";
319 break;
320 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
321 errmsg = "Content error in the external subset\n";
322 break;
323 case XML_ERR_CONDSEC_INVALID_KEYWORD:
324 errmsg = "conditional section INCLUDE or IGNORE keyword expected\n";
325 break;
326 case XML_ERR_CONDSEC_NOT_FINISHED:
327 errmsg = "XML conditional section not closed\n";
328 break;
329 case XML_ERR_XMLDECL_NOT_STARTED:
330 errmsg = "Text declaration '<?xml' required\n";
331 break;
332 case XML_ERR_XMLDECL_NOT_FINISHED:
333 errmsg = "parsing XML declaration: '?>' expected\n";
334 break;
335 case XML_ERR_EXT_ENTITY_STANDALONE:
336 errmsg = "external parsed entities cannot be standalone\n";
337 break;
338 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
339 errmsg = "EntityRef: expecting ';'\n";
340 break;
341 case XML_ERR_DOCTYPE_NOT_FINISHED:
342 errmsg = "DOCTYPE improperly terminated\n";
343 break;
344 case XML_ERR_LTSLASH_REQUIRED:
345 errmsg = "EndTag: '</' not found\n";
346 break;
347 case XML_ERR_EQUAL_REQUIRED:
348 errmsg = "expected '='\n";
349 break;
350 case XML_ERR_STRING_NOT_CLOSED:
351 errmsg = "String not closed expecting \" or '\n";
352 break;
353 case XML_ERR_STRING_NOT_STARTED:
354 errmsg = "String not started expecting ' or \"\n";
355 break;
356 case XML_ERR_ENCODING_NAME:
357 errmsg = "Invalid XML encoding name\n";
358 break;
359 case XML_ERR_STANDALONE_VALUE:
360 errmsg = "standalone accepts only 'yes' or 'no'\n";
361 break;
362 case XML_ERR_DOCUMENT_EMPTY:
363 errmsg = "Document is empty\n";
364 break;
365 case XML_ERR_DOCUMENT_END:
366 errmsg = "Extra content at the end of the document\n";
367 break;
368 case XML_ERR_NOT_WELL_BALANCED:
369 errmsg = "chunk is not well balanced\n";
370 break;
371 case XML_ERR_EXTRA_CONTENT:
372 errmsg = "extra content at the end of well balanced chunk\n";
373 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000374 case XML_ERR_VERSION_MISSING:
375 errmsg = "Malformed declaration expecting version\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377#if 0
378 case :
379 errmsg = "\n";
380 break;
381#endif
382 default:
383 errmsg = "Unregistered error message\n";
384 }
385 ctxt->errNo = error;
386 if (info == NULL) {
387 ctxt->sax->error(ctxt->userData, errmsg);
388 } else {
389 ctxt->sax->error(ctxt->userData, "%s: %s", errmsg, info);
390 }
391 ctxt->wellFormed = 0;
392 if (ctxt->recovery == 0)
393 ctxt->disableSAX = 1;
394}
395
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000396/**
397 * xmlFatalErrMsg:
398 * @ctxt: an XML parser context
399 * @error: the error number
400 * @msg: the error message
401 *
402 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
403 */
404static void
405xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg)
406{
407 if (ctxt == NULL) {
408 xmlGenericError(xmlGenericErrorContext,
409 "xmlFatalErr: no context !\n");
410 return;
411 }
412 ctxt->errNo = error;
413 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
414 return;
415 ctxt->sax->error(ctxt->userData, msg);
416 ctxt->wellFormed = 0;
417 if (ctxt->recovery == 0)
418 ctxt->disableSAX = 1;
419}
420
421/**
422 * xmlFatalErrMsgInt:
423 * @ctxt: an XML parser context
424 * @error: the error number
425 * @msg: the error message
426 * @val: an integer value
427 *
428 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
429 */
430static void
431xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
432 const char *msg, int val)
433{
434 if (ctxt == NULL) {
435 xmlGenericError(xmlGenericErrorContext,
436 "xmlFatalErr: no context !\n");
437 return;
438 }
439 ctxt->errNo = error;
440 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
441 return;
442 ctxt->sax->error(ctxt->userData, msg, val);
443 ctxt->wellFormed = 0;
444 if (ctxt->recovery == 0)
445 ctxt->disableSAX = 1;
446}
447
448/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000449 * xmlFatalErrMsgStr:
450 * @ctxt: an XML parser context
451 * @error: the error number
452 * @msg: the error message
453 * @val: a string value
454 *
455 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
456 */
457static void
458xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
459 const char *msg, const xmlChar *val)
460{
461 if (ctxt == NULL) {
462 xmlGenericError(xmlGenericErrorContext,
463 "xmlFatalErr: no context !\n");
464 return;
465 }
466 ctxt->errNo = error;
467 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
468 return;
469 ctxt->sax->error(ctxt->userData, msg, val);
470 ctxt->wellFormed = 0;
471 if (ctxt->recovery == 0)
472 ctxt->disableSAX = 1;
473}
474
475/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 * xmlNsErr:
477 * @ctxt: an XML parser context
478 * @error: the error number
479 * @msg: the message
480 * @info1: extra information string
481 * @info2: extra information string
482 *
483 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
484 */
485static void
486xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
487 const char *msg,
488 const xmlChar *info1, const xmlChar *info2, const xmlChar *info3)
489{
490 if (ctxt == NULL)
491 return;
492 if ((ctxt->sax == NULL) || (ctxt->sax->error == NULL))
493 return;
494
495 ctxt->errNo = error;
496 if (info1 == NULL) {
497 ctxt->sax->error(ctxt->userData, msg);
498 } else if (info2 == NULL) {
499 ctxt->sax->error(ctxt->userData, msg, info1);
500 } else if (info3 == NULL) {
501 ctxt->sax->error(ctxt->userData, msg, info1, info2);
502 } else {
503 ctxt->sax->error(ctxt->userData, msg, info1, info2, info3);
504 }
505 ctxt->nsWellFormed = 0;
506}
507
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508/************************************************************************
509 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000510 * SAX2 defaulted attributes handling *
511 * *
512 ************************************************************************/
513
514/**
515 * xmlDetectSAX2:
516 * @ctxt: an XML parser context
517 *
518 * Do the SAX2 detection and specific intialization
519 */
520static void
521xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
522 if (ctxt == NULL) return;
523 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
524 ((ctxt->sax->startElementNs != NULL) ||
525 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
526
527 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
528 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
529 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
530}
531
532#ifdef SAX2
533typedef struct _xmlDefAttrs xmlDefAttrs;
534typedef xmlDefAttrs *xmlDefAttrsPtr;
535struct _xmlDefAttrs {
536 int nbAttrs; /* number of defaulted attributes on that element */
537 int maxAttrs; /* the size of the array */
538 const xmlChar *values[4]; /* array of localname/prefix/values */
539};
540#endif
541
542/**
543 * xmlAddDefAttrs:
544 * @ctxt: an XML parser context
545 * @fullname: the element fullname
546 * @fullattr: the attribute fullname
547 * @value: the attribute value
548 *
549 * Add a defaulted attribute for an element
550 */
551static void
552xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
553 const xmlChar *fullname,
554 const xmlChar *fullattr,
555 const xmlChar *value) {
556 xmlDefAttrsPtr defaults;
557 int len;
558 const xmlChar *name;
559 const xmlChar *prefix;
560
561 if (ctxt->attsDefault == NULL) {
562 ctxt->attsDefault = xmlHashCreate(10);
563 if (ctxt->attsDefault == NULL)
564 goto mem_error;
565 }
566
567 /*
568 * plit the element name into prefix:localname , the string found
569 * are within the DTD and hen not associated to namespace names.
570 */
571 name = xmlSplitQName3(fullname, &len);
572 if (name == NULL) {
573 name = xmlDictLookup(ctxt->dict, fullname, -1);
574 prefix = NULL;
575 } else {
576 name = xmlDictLookup(ctxt->dict, name, -1);
577 prefix = xmlDictLookup(ctxt->dict, fullname, len);
578 }
579
580 /*
581 * make sure there is some storage
582 */
583 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
584 if (defaults == NULL) {
585 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
586 12 * sizeof(const xmlChar *));
587 if (defaults == NULL)
588 goto mem_error;
589 defaults->maxAttrs = 4;
590 defaults->nbAttrs = 0;
591 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
592 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
593 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
594 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
595 if (defaults == NULL)
596 goto mem_error;
597 defaults->maxAttrs *= 2;
598 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
599 }
600
601 /*
602 * plit the element name into prefix:localname , the string found
603 * are within the DTD and hen not associated to namespace names.
604 */
605 name = xmlSplitQName3(fullattr, &len);
606 if (name == NULL) {
607 name = xmlDictLookup(ctxt->dict, fullattr, -1);
608 prefix = NULL;
609 } else {
610 name = xmlDictLookup(ctxt->dict, name, -1);
611 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
612 }
613
614 defaults->values[4 * defaults->nbAttrs] = name;
615 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
616 /* intern the string and precompute the end */
617 len = xmlStrlen(value);
618 value = xmlDictLookup(ctxt->dict, value, len);
619 defaults->values[4 * defaults->nbAttrs + 2] = value;
620 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
621 defaults->nbAttrs++;
622
623 return;
624
625mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000626 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000627 return;
628}
629
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000630/**
631 * xmlAddSpecialAttr:
632 * @ctxt: an XML parser context
633 * @fullname: the element fullname
634 * @fullattr: the attribute fullname
635 * @type: the attribute type
636 *
637 * Register that this attribute is not CDATA
638 */
639static void
640xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
641 const xmlChar *fullname,
642 const xmlChar *fullattr,
643 int type)
644{
645 if (ctxt->attsSpecial == NULL) {
646 ctxt->attsSpecial = xmlHashCreate(10);
647 if (ctxt->attsSpecial == NULL)
648 goto mem_error;
649 }
650
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000651 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
652 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000653 return;
654
655mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000656 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000657 return;
658}
659
Owen Taylor3473f882001-02-23 17:55:21 +0000660/************************************************************************
661 * *
662 * Parser stacks related functions and macros *
663 * *
664 ************************************************************************/
665
666xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
667 const xmlChar ** str);
668
Daniel Veillard0fb18932003-09-07 09:14:37 +0000669#ifdef SAX2
670/**
671 * nsPush:
672 * @ctxt: an XML parser context
673 * @prefix: the namespace prefix or NULL
674 * @URL: the namespace name
675 *
676 * Pushes a new parser namespace on top of the ns stack
677 *
678 * Returns -1 in case of error, the index in the stack otherwise
679 */
680static int
681nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
682{
683 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
684 ctxt->nsMax = 10;
685 ctxt->nsNr = 0;
686 ctxt->nsTab = (const xmlChar **)
687 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
688 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000689 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000690 ctxt->nsMax = 0;
691 return (-1);
692 }
693 } else if (ctxt->nsNr >= ctxt->nsMax) {
694 ctxt->nsMax *= 2;
695 ctxt->nsTab = (const xmlChar **)
696 xmlRealloc(ctxt->nsTab,
697 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
698 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000699 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000700 ctxt->nsMax /= 2;
701 return (-1);
702 }
703 }
704 ctxt->nsTab[ctxt->nsNr++] = prefix;
705 ctxt->nsTab[ctxt->nsNr++] = URL;
706 return (ctxt->nsNr);
707}
708/**
709 * nsPop:
710 * @ctxt: an XML parser context
711 * @nr: the number to pop
712 *
713 * Pops the top @nr parser prefix/namespace from the ns stack
714 *
715 * Returns the number of namespaces removed
716 */
717static int
718nsPop(xmlParserCtxtPtr ctxt, int nr)
719{
720 int i;
721
722 if (ctxt->nsTab == NULL) return(0);
723 if (ctxt->nsNr < nr) {
724 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
725 nr = ctxt->nsNr;
726 }
727 if (ctxt->nsNr <= 0)
728 return (0);
729
730 for (i = 0;i < nr;i++) {
731 ctxt->nsNr--;
732 ctxt->nsTab[ctxt->nsNr] = NULL;
733 }
734 return(nr);
735}
736#endif
737
738static int
739xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
740 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000741 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000742 int maxatts;
743
744 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000745 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000746 atts = (const xmlChar **)
747 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000748 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000749 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000750 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
751 if (attallocs == NULL) goto mem_error;
752 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000753 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000754 } else if (nr + 5 > ctxt->maxatts) {
755 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000756 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
757 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000758 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000759 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000760 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
761 (maxatts / 5) * sizeof(int));
762 if (attallocs == NULL) goto mem_error;
763 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000764 ctxt->maxatts = maxatts;
765 }
766 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000767mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000768 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000769 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000770}
771
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000772/**
773 * inputPush:
774 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000775 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000776 *
777 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000778 *
779 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000780 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000781extern int
782inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
783{
784 if (ctxt->inputNr >= ctxt->inputMax) {
785 ctxt->inputMax *= 2;
786 ctxt->inputTab =
787 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
788 ctxt->inputMax *
789 sizeof(ctxt->inputTab[0]));
790 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000791 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000792 return (0);
793 }
794 }
795 ctxt->inputTab[ctxt->inputNr] = value;
796 ctxt->input = value;
797 return (ctxt->inputNr++);
798}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000799/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000800 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000801 * @ctxt: an XML parser context
802 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000803 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000804 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000805 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000806 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000807extern xmlParserInputPtr
808inputPop(xmlParserCtxtPtr ctxt)
809{
810 xmlParserInputPtr ret;
811
812 if (ctxt->inputNr <= 0)
813 return (0);
814 ctxt->inputNr--;
815 if (ctxt->inputNr > 0)
816 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
817 else
818 ctxt->input = NULL;
819 ret = ctxt->inputTab[ctxt->inputNr];
820 ctxt->inputTab[ctxt->inputNr] = 0;
821 return (ret);
822}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000823/**
824 * nodePush:
825 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000826 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000827 *
828 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000829 *
830 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000831 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000832extern int
833nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
834{
835 if (ctxt->nodeNr >= ctxt->nodeMax) {
836 ctxt->nodeMax *= 2;
837 ctxt->nodeTab =
838 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
839 ctxt->nodeMax *
840 sizeof(ctxt->nodeTab[0]));
841 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000842 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000843 return (0);
844 }
845 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000846#ifdef MAX_DEPTH
847 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000848 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000849 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000850 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000851 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000852 return(0);
853 }
854#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000855 ctxt->nodeTab[ctxt->nodeNr] = value;
856 ctxt->node = value;
857 return (ctxt->nodeNr++);
858}
859/**
860 * nodePop:
861 * @ctxt: an XML parser context
862 *
863 * Pops the top element node from the node stack
864 *
865 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000866 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000867extern xmlNodePtr
868nodePop(xmlParserCtxtPtr ctxt)
869{
870 xmlNodePtr ret;
871
872 if (ctxt->nodeNr <= 0)
873 return (0);
874 ctxt->nodeNr--;
875 if (ctxt->nodeNr > 0)
876 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
877 else
878 ctxt->node = NULL;
879 ret = ctxt->nodeTab[ctxt->nodeNr];
880 ctxt->nodeTab[ctxt->nodeNr] = 0;
881 return (ret);
882}
883/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 * nameNsPush:
885 * @ctxt: an XML parser context
886 * @value: the element name
887 * @prefix: the element prefix
888 * @URI: the element namespace name
889 *
890 * Pushes a new element name/prefix/URL on top of the name stack
891 *
892 * Returns -1 in case of error, the index in the stack otherwise
893 */
894static int
895nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
896 const xmlChar *prefix, const xmlChar *URI, int nsNr)
897{
898 if (ctxt->nameNr >= ctxt->nameMax) {
899 const xmlChar * *tmp;
900 void **tmp2;
901 ctxt->nameMax *= 2;
902 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
903 ctxt->nameMax *
904 sizeof(ctxt->nameTab[0]));
905 if (tmp == NULL) {
906 ctxt->nameMax /= 2;
907 goto mem_error;
908 }
909 ctxt->nameTab = tmp;
910 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
911 ctxt->nameMax * 3 *
912 sizeof(ctxt->pushTab[0]));
913 if (tmp2 == NULL) {
914 ctxt->nameMax /= 2;
915 goto mem_error;
916 }
917 ctxt->pushTab = tmp2;
918 }
919 ctxt->nameTab[ctxt->nameNr] = value;
920 ctxt->name = value;
921 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
922 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000923 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000924 return (ctxt->nameNr++);
925mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000926 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000927 return (-1);
928}
929/**
930 * nameNsPop:
931 * @ctxt: an XML parser context
932 *
933 * Pops the top element/prefix/URI name from the name stack
934 *
935 * Returns the name just removed
936 */
937static const xmlChar *
938nameNsPop(xmlParserCtxtPtr ctxt)
939{
940 const xmlChar *ret;
941
942 if (ctxt->nameNr <= 0)
943 return (0);
944 ctxt->nameNr--;
945 if (ctxt->nameNr > 0)
946 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
947 else
948 ctxt->name = NULL;
949 ret = ctxt->nameTab[ctxt->nameNr];
950 ctxt->nameTab[ctxt->nameNr] = NULL;
951 return (ret);
952}
953
954/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000955 * namePush:
956 * @ctxt: an XML parser context
957 * @value: the element name
958 *
959 * Pushes a new element name on top of the name stack
960 *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000961 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962 */
963extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000964namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +0000965{
966 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000967 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000969 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +0000970 ctxt->nameMax *
971 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000972 if (tmp == NULL) {
973 ctxt->nameMax /= 2;
974 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000976 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000977 }
978 ctxt->nameTab[ctxt->nameNr] = value;
979 ctxt->name = value;
980 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000981mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000982 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000983 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000984}
985/**
986 * namePop:
987 * @ctxt: an XML parser context
988 *
989 * Pops the top element name from the name stack
990 *
991 * Returns the name just removed
992 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000993extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000994namePop(xmlParserCtxtPtr ctxt)
995{
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000996 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000997
998 if (ctxt->nameNr <= 0)
999 return (0);
1000 ctxt->nameNr--;
1001 if (ctxt->nameNr > 0)
1002 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1003 else
1004 ctxt->name = NULL;
1005 ret = ctxt->nameTab[ctxt->nameNr];
1006 ctxt->nameTab[ctxt->nameNr] = 0;
1007 return (ret);
1008}
Owen Taylor3473f882001-02-23 17:55:21 +00001009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001010static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001011 if (ctxt->spaceNr >= ctxt->spaceMax) {
1012 ctxt->spaceMax *= 2;
1013 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1014 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1015 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001016 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001017 return(0);
1018 }
1019 }
1020 ctxt->spaceTab[ctxt->spaceNr] = val;
1021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1022 return(ctxt->spaceNr++);
1023}
1024
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001025static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001026 int ret;
1027 if (ctxt->spaceNr <= 0) return(0);
1028 ctxt->spaceNr--;
1029 if (ctxt->spaceNr > 0)
1030 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1031 else
1032 ctxt->space = NULL;
1033 ret = ctxt->spaceTab[ctxt->spaceNr];
1034 ctxt->spaceTab[ctxt->spaceNr] = -1;
1035 return(ret);
1036}
1037
1038/*
1039 * Macros for accessing the content. Those should be used only by the parser,
1040 * and not exported.
1041 *
1042 * Dirty macros, i.e. one often need to make assumption on the context to
1043 * use them
1044 *
1045 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1046 * To be used with extreme caution since operations consuming
1047 * characters may move the input buffer to a different location !
1048 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1049 * This should be used internally by the parser
1050 * only to compare to ASCII values otherwise it would break when
1051 * running with UTF-8 encoding.
1052 * RAW same as CUR but in the input buffer, bypass any token
1053 * extraction that may have been done
1054 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1055 * to compare on ASCII based substring.
1056 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001057 * strings without newlines within the parser.
1058 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1059 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001060 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1061 *
1062 * NEXT Skip to the next character, this does the proper decoding
1063 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001064 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001065 * CUR_CHAR(l) returns the current unicode character (int), set l
1066 * to the number of xmlChars used for the encoding [0-5].
1067 * CUR_SCHAR same but operate on a string instead of the context
1068 * COPY_BUF copy the current unicode char to the target buffer, increment
1069 * the index
1070 * GROW, SHRINK handling of input buffers
1071 */
1072
Daniel Veillardfdc91562002-07-01 21:52:03 +00001073#define RAW (*ctxt->input->cur)
1074#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001075#define NXT(val) ctxt->input->cur[(val)]
1076#define CUR_PTR ctxt->input->cur
1077
1078#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001079 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001080 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001081 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001082 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1083 xmlPopInput(ctxt); \
1084 } while (0)
1085
Daniel Veillarda880b122003-04-21 21:36:41 +00001086#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001087 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1088 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001089 xmlSHRINK (ctxt);
1090
1091static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1092 xmlParserInputShrink(ctxt->input);
1093 if ((*ctxt->input->cur == 0) &&
1094 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1095 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001096 }
Owen Taylor3473f882001-02-23 17:55:21 +00001097
Daniel Veillarda880b122003-04-21 21:36:41 +00001098#define GROW if ((ctxt->progressive == 0) && \
1099 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001100 xmlGROW (ctxt);
1101
1102static void xmlGROW (xmlParserCtxtPtr ctxt) {
1103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1104 if ((*ctxt->input->cur == 0) &&
1105 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1106 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001107}
Owen Taylor3473f882001-02-23 17:55:21 +00001108
1109#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1110
1111#define NEXT xmlNextChar(ctxt)
1112
Daniel Veillard21a0f912001-02-25 19:54:14 +00001113#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001114 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001115 ctxt->input->cur++; \
1116 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001117 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001118 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1119 }
1120
Owen Taylor3473f882001-02-23 17:55:21 +00001121#define NEXTL(l) do { \
1122 if (*(ctxt->input->cur) == '\n') { \
1123 ctxt->input->line++; ctxt->input->col = 1; \
1124 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001125 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001126 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001127 } while (0)
1128
1129#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1130#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1131
1132#define COPY_BUF(l,b,i,v) \
1133 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001134 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001135
1136/**
1137 * xmlSkipBlankChars:
1138 * @ctxt: the XML parser context
1139 *
1140 * skip all blanks character found at that point in the input streams.
1141 * It pops up finished entities in the process if allowable at that point.
1142 *
1143 * Returns the number of space chars skipped
1144 */
1145
1146int
1147xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001148 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001149
1150 /*
1151 * It's Okay to use CUR/NEXT here since all the blanks are on
1152 * the ASCII range.
1153 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001154 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1155 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001156 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001157 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001158 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001159 cur = ctxt->input->cur;
1160 while (IS_BLANK(*cur)) {
1161 if (*cur == '\n') {
1162 ctxt->input->line++; ctxt->input->col = 1;
1163 }
1164 cur++;
1165 res++;
1166 if (*cur == 0) {
1167 ctxt->input->cur = cur;
1168 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1169 cur = ctxt->input->cur;
1170 }
1171 }
1172 ctxt->input->cur = cur;
1173 } else {
1174 int cur;
1175 do {
1176 cur = CUR;
1177 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1178 NEXT;
1179 cur = CUR;
1180 res++;
1181 }
1182 while ((cur == 0) && (ctxt->inputNr > 1) &&
1183 (ctxt->instate != XML_PARSER_COMMENT)) {
1184 xmlPopInput(ctxt);
1185 cur = CUR;
1186 }
1187 /*
1188 * Need to handle support of entities branching here
1189 */
1190 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1191 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1192 }
Owen Taylor3473f882001-02-23 17:55:21 +00001193 return(res);
1194}
1195
1196/************************************************************************
1197 * *
1198 * Commodity functions to handle entities *
1199 * *
1200 ************************************************************************/
1201
1202/**
1203 * xmlPopInput:
1204 * @ctxt: an XML parser context
1205 *
1206 * xmlPopInput: the current input pointed by ctxt->input came to an end
1207 * pop it and return the next char.
1208 *
1209 * Returns the current xmlChar in the parser context
1210 */
1211xmlChar
1212xmlPopInput(xmlParserCtxtPtr ctxt) {
1213 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1214 if (xmlParserDebugEntities)
1215 xmlGenericError(xmlGenericErrorContext,
1216 "Popping input %d\n", ctxt->inputNr);
1217 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001218 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001219 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1220 return(xmlPopInput(ctxt));
1221 return(CUR);
1222}
1223
1224/**
1225 * xmlPushInput:
1226 * @ctxt: an XML parser context
1227 * @input: an XML parser input fragment (entity, XML fragment ...).
1228 *
1229 * xmlPushInput: switch to a new input stream which is stacked on top
1230 * of the previous one(s).
1231 */
1232void
1233xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1234 if (input == NULL) return;
1235
1236 if (xmlParserDebugEntities) {
1237 if ((ctxt->input != NULL) && (ctxt->input->filename))
1238 xmlGenericError(xmlGenericErrorContext,
1239 "%s(%d): ", ctxt->input->filename,
1240 ctxt->input->line);
1241 xmlGenericError(xmlGenericErrorContext,
1242 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1243 }
1244 inputPush(ctxt, input);
1245 GROW;
1246}
1247
1248/**
1249 * xmlParseCharRef:
1250 * @ctxt: an XML parser context
1251 *
1252 * parse Reference declarations
1253 *
1254 * [66] CharRef ::= '&#' [0-9]+ ';' |
1255 * '&#x' [0-9a-fA-F]+ ';'
1256 *
1257 * [ WFC: Legal Character ]
1258 * Characters referred to using character references must match the
1259 * production for Char.
1260 *
1261 * Returns the value parsed (as an int), 0 in case of error
1262 */
1263int
1264xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001265 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001266 int count = 0;
1267
Owen Taylor3473f882001-02-23 17:55:21 +00001268 /*
1269 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1270 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001271 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001272 (NXT(2) == 'x')) {
1273 SKIP(3);
1274 GROW;
1275 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001276 if (count++ > 20) {
1277 count = 0;
1278 GROW;
1279 }
1280 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001281 val = val * 16 + (CUR - '0');
1282 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1283 val = val * 16 + (CUR - 'a') + 10;
1284 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1285 val = val * 16 + (CUR - 'A') + 10;
1286 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001287 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001288 val = 0;
1289 break;
1290 }
1291 NEXT;
1292 count++;
1293 }
1294 if (RAW == ';') {
1295 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001296 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001297 ctxt->nbChars ++;
1298 ctxt->input->cur++;
1299 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001300 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001301 SKIP(2);
1302 GROW;
1303 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001304 if (count++ > 20) {
1305 count = 0;
1306 GROW;
1307 }
1308 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001309 val = val * 10 + (CUR - '0');
1310 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001312 val = 0;
1313 break;
1314 }
1315 NEXT;
1316 count++;
1317 }
1318 if (RAW == ';') {
1319 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001320 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001321 ctxt->nbChars ++;
1322 ctxt->input->cur++;
1323 }
1324 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001325 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001326 }
1327
1328 /*
1329 * [ WFC: Legal Character ]
1330 * Characters referred to using character references must match the
1331 * production for Char.
1332 */
1333 if (IS_CHAR(val)) {
1334 return(val);
1335 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001336 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1337 "xmlParseCharRef: invalid xmlChar value %d\n",
1338 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001339 }
1340 return(0);
1341}
1342
1343/**
1344 * xmlParseStringCharRef:
1345 * @ctxt: an XML parser context
1346 * @str: a pointer to an index in the string
1347 *
1348 * parse Reference declarations, variant parsing from a string rather
1349 * than an an input flow.
1350 *
1351 * [66] CharRef ::= '&#' [0-9]+ ';' |
1352 * '&#x' [0-9a-fA-F]+ ';'
1353 *
1354 * [ WFC: Legal Character ]
1355 * Characters referred to using character references must match the
1356 * production for Char.
1357 *
1358 * Returns the value parsed (as an int), 0 in case of error, str will be
1359 * updated to the current value of the index
1360 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001361static int
Owen Taylor3473f882001-02-23 17:55:21 +00001362xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1363 const xmlChar *ptr;
1364 xmlChar cur;
1365 int val = 0;
1366
1367 if ((str == NULL) || (*str == NULL)) return(0);
1368 ptr = *str;
1369 cur = *ptr;
1370 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1371 ptr += 3;
1372 cur = *ptr;
1373 while (cur != ';') { /* Non input consuming loop */
1374 if ((cur >= '0') && (cur <= '9'))
1375 val = val * 16 + (cur - '0');
1376 else if ((cur >= 'a') && (cur <= 'f'))
1377 val = val * 16 + (cur - 'a') + 10;
1378 else if ((cur >= 'A') && (cur <= 'F'))
1379 val = val * 16 + (cur - 'A') + 10;
1380 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001381 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001382 val = 0;
1383 break;
1384 }
1385 ptr++;
1386 cur = *ptr;
1387 }
1388 if (cur == ';')
1389 ptr++;
1390 } else if ((cur == '&') && (ptr[1] == '#')){
1391 ptr += 2;
1392 cur = *ptr;
1393 while (cur != ';') { /* Non input consuming loops */
1394 if ((cur >= '0') && (cur <= '9'))
1395 val = val * 10 + (cur - '0');
1396 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001397 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001398 val = 0;
1399 break;
1400 }
1401 ptr++;
1402 cur = *ptr;
1403 }
1404 if (cur == ';')
1405 ptr++;
1406 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001408 return(0);
1409 }
1410 *str = ptr;
1411
1412 /*
1413 * [ WFC: Legal Character ]
1414 * Characters referred to using character references must match the
1415 * production for Char.
1416 */
1417 if (IS_CHAR(val)) {
1418 return(val);
1419 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1421 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1422 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001423 }
1424 return(0);
1425}
1426
1427/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001428 * xmlNewBlanksWrapperInputStream:
1429 * @ctxt: an XML parser context
1430 * @entity: an Entity pointer
1431 *
1432 * Create a new input stream for wrapping
1433 * blanks around a PEReference
1434 *
1435 * Returns the new input stream or NULL
1436 */
1437
1438static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1439
Daniel Veillardf4862f02002-09-10 11:13:43 +00001440static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001441xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1442 xmlParserInputPtr input;
1443 xmlChar *buffer;
1444 size_t length;
1445 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1447 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001448 return(NULL);
1449 }
1450 if (xmlParserDebugEntities)
1451 xmlGenericError(xmlGenericErrorContext,
1452 "new blanks wrapper for entity: %s\n", entity->name);
1453 input = xmlNewInputStream(ctxt);
1454 if (input == NULL) {
1455 return(NULL);
1456 }
1457 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001458 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001459 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001460 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001461 return(NULL);
1462 }
1463 buffer [0] = ' ';
1464 buffer [1] = '%';
1465 buffer [length-3] = ';';
1466 buffer [length-2] = ' ';
1467 buffer [length-1] = 0;
1468 memcpy(buffer + 2, entity->name, length - 5);
1469 input->free = deallocblankswrapper;
1470 input->base = buffer;
1471 input->cur = buffer;
1472 input->length = length;
1473 input->end = &buffer[length];
1474 return(input);
1475}
1476
1477/**
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * xmlParserHandlePEReference:
1479 * @ctxt: the parser context
1480 *
1481 * [69] PEReference ::= '%' Name ';'
1482 *
1483 * [ WFC: No Recursion ]
1484 * A parsed entity must not contain a recursive
1485 * reference to itself, either directly or indirectly.
1486 *
1487 * [ WFC: Entity Declared ]
1488 * In a document without any DTD, a document with only an internal DTD
1489 * subset which contains no parameter entity references, or a document
1490 * with "standalone='yes'", ... ... The declaration of a parameter
1491 * entity must precede any reference to it...
1492 *
1493 * [ VC: Entity Declared ]
1494 * In a document with an external subset or external parameter entities
1495 * with "standalone='no'", ... ... The declaration of a parameter entity
1496 * must precede any reference to it...
1497 *
1498 * [ WFC: In DTD ]
1499 * Parameter-entity references may only appear in the DTD.
1500 * NOTE: misleading but this is handled.
1501 *
1502 * A PEReference may have been detected in the current input stream
1503 * the handling is done accordingly to
1504 * http://www.w3.org/TR/REC-xml#entproc
1505 * i.e.
1506 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001507 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001508 */
1509void
1510xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001511 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001512 xmlEntityPtr entity = NULL;
1513 xmlParserInputPtr input;
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515 if (RAW != '%') return;
1516 switch(ctxt->instate) {
1517 case XML_PARSER_CDATA_SECTION:
1518 return;
1519 case XML_PARSER_COMMENT:
1520 return;
1521 case XML_PARSER_START_TAG:
1522 return;
1523 case XML_PARSER_END_TAG:
1524 return;
1525 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001526 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 return;
1528 case XML_PARSER_PROLOG:
1529 case XML_PARSER_START:
1530 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001531 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001532 return;
1533 case XML_PARSER_ENTITY_DECL:
1534 case XML_PARSER_CONTENT:
1535 case XML_PARSER_ATTRIBUTE_VALUE:
1536 case XML_PARSER_PI:
1537 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001538 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001539 /* we just ignore it there */
1540 return;
1541 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001542 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001543 return;
1544 case XML_PARSER_ENTITY_VALUE:
1545 /*
1546 * NOTE: in the case of entity values, we don't do the
1547 * substitution here since we need the literal
1548 * entity value to be able to save the internal
1549 * subset of the document.
1550 * This will be handled by xmlStringDecodeEntities
1551 */
1552 return;
1553 case XML_PARSER_DTD:
1554 /*
1555 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1556 * In the internal DTD subset, parameter-entity references
1557 * can occur only where markup declarations can occur, not
1558 * within markup declarations.
1559 * In that case this is handled in xmlParseMarkupDecl
1560 */
1561 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1562 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001563 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1564 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001565 break;
1566 case XML_PARSER_IGNORE:
1567 return;
1568 }
1569
1570 NEXT;
1571 name = xmlParseName(ctxt);
1572 if (xmlParserDebugEntities)
1573 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001574 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001576 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001577 } else {
1578 if (RAW == ';') {
1579 NEXT;
1580 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1581 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1582 if (entity == NULL) {
1583
1584 /*
1585 * [ WFC: Entity Declared ]
1586 * In a document without any DTD, a document with only an
1587 * internal DTD subset which contains no parameter entity
1588 * references, or a document with "standalone='yes'", ...
1589 * ... The declaration of a parameter entity must precede
1590 * any reference to it...
1591 */
1592 if ((ctxt->standalone == 1) ||
1593 ((ctxt->hasExternalSubset == 0) &&
1594 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001595 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001596 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001597 } else {
1598 /*
1599 * [ VC: Entity Declared ]
1600 * In a document with an external subset or external
1601 * parameter entities with "standalone='no'", ...
1602 * ... The declaration of a parameter entity must precede
1603 * any reference to it...
1604 */
1605 if ((!ctxt->disableSAX) &&
1606 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1607 ctxt->vctxt.error(ctxt->vctxt.userData,
1608 "PEReference: %%%s; not found\n", name);
1609 } else if ((!ctxt->disableSAX) &&
1610 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1611 ctxt->sax->warning(ctxt->userData,
1612 "PEReference: %%%s; not found\n", name);
1613 ctxt->valid = 0;
1614 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001615 } else if (ctxt->input->free != deallocblankswrapper) {
1616 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1617 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001618 } else {
1619 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1620 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001621 xmlChar start[4];
1622 xmlCharEncoding enc;
1623
Owen Taylor3473f882001-02-23 17:55:21 +00001624 /*
1625 * handle the extra spaces added before and after
1626 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001627 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001628 */
1629 input = xmlNewEntityInputStream(ctxt, entity);
1630 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001631
1632 /*
1633 * Get the 4 first bytes and decode the charset
1634 * if enc != XML_CHAR_ENCODING_NONE
1635 * plug some encoding conversion routines.
1636 */
1637 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001638 if (entity->length >= 4) {
1639 start[0] = RAW;
1640 start[1] = NXT(1);
1641 start[2] = NXT(2);
1642 start[3] = NXT(3);
1643 enc = xmlDetectCharEncoding(start, 4);
1644 if (enc != XML_CHAR_ENCODING_NONE) {
1645 xmlSwitchEncoding(ctxt, enc);
1646 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001647 }
1648
Owen Taylor3473f882001-02-23 17:55:21 +00001649 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1650 (RAW == '<') && (NXT(1) == '?') &&
1651 (NXT(2) == 'x') && (NXT(3) == 'm') &&
1652 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1653 xmlParseTextDecl(ctxt);
1654 }
Owen Taylor3473f882001-02-23 17:55:21 +00001655 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001656 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1657 "PEReference: %s is not a parameter entity\n",
1658 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001659 }
1660 }
1661 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001662 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001663 }
Owen Taylor3473f882001-02-23 17:55:21 +00001664 }
1665}
1666
1667/*
1668 * Macro used to grow the current buffer.
1669 */
1670#define growBuffer(buffer) { \
1671 buffer##_size *= 2; \
1672 buffer = (xmlChar *) \
1673 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001674 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001675}
1676
1677/**
1678 * xmlStringDecodeEntities:
1679 * @ctxt: the parser context
1680 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001682 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1683 * @end: an end marker xmlChar, 0 if none
1684 * @end2: an end marker xmlChar, 0 if none
1685 * @end3: an end marker xmlChar, 0 if none
1686 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001687 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001688 *
1689 * [67] Reference ::= EntityRef | CharRef
1690 *
1691 * [69] PEReference ::= '%' Name ';'
1692 *
1693 * Returns A newly allocated string with the substitution done. The caller
1694 * must deallocate it !
1695 */
1696xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001697xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1698 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001699 xmlChar *buffer = NULL;
1700 int buffer_size = 0;
1701
1702 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001703 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 xmlEntityPtr ent;
1705 int c,l;
1706 int nbchars = 0;
1707
Daniel Veillarde57ec792003-09-10 10:50:59 +00001708 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001710 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001711
1712 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001713 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001714 return(NULL);
1715 }
1716
1717 /*
1718 * allocate a translation buffer.
1719 */
1720 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001721 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001722 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001723
1724 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001725 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001726 * we are operating on already parsed values.
1727 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001728 if (str < last)
1729 c = CUR_SCHAR(str, l);
1730 else
1731 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 while ((c != 0) && (c != end) && /* non input consuming loop */
1733 (c != end2) && (c != end3)) {
1734
1735 if (c == 0) break;
1736 if ((c == '&') && (str[1] == '#')) {
1737 int val = xmlParseStringCharRef(ctxt, &str);
1738 if (val != 0) {
1739 COPY_BUF(0,buffer,nbchars,val);
1740 }
1741 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1742 if (xmlParserDebugEntities)
1743 xmlGenericError(xmlGenericErrorContext,
1744 "String decoding Entity Reference: %.30s\n",
1745 str);
1746 ent = xmlParseStringEntityRef(ctxt, &str);
1747 if ((ent != NULL) &&
1748 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1749 if (ent->content != NULL) {
1750 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1751 } else {
1752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1753 ctxt->sax->error(ctxt->userData,
1754 "internal error entity has no content\n");
1755 }
1756 } else if ((ent != NULL) && (ent->content != NULL)) {
1757 xmlChar *rep;
1758
1759 ctxt->depth++;
1760 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1761 0, 0, 0);
1762 ctxt->depth--;
1763 if (rep != NULL) {
1764 current = rep;
1765 while (*current != 0) { /* non input consuming loop */
1766 buffer[nbchars++] = *current++;
1767 if (nbchars >
1768 buffer_size - XML_PARSER_BUFFER_SIZE) {
1769 growBuffer(buffer);
1770 }
1771 }
1772 xmlFree(rep);
1773 }
1774 } else if (ent != NULL) {
1775 int i = xmlStrlen(ent->name);
1776 const xmlChar *cur = ent->name;
1777
1778 buffer[nbchars++] = '&';
1779 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1780 growBuffer(buffer);
1781 }
1782 for (;i > 0;i--)
1783 buffer[nbchars++] = *cur++;
1784 buffer[nbchars++] = ';';
1785 }
1786 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1787 if (xmlParserDebugEntities)
1788 xmlGenericError(xmlGenericErrorContext,
1789 "String decoding PE Reference: %.30s\n", str);
1790 ent = xmlParseStringPEReference(ctxt, &str);
1791 if (ent != NULL) {
1792 xmlChar *rep;
1793
1794 ctxt->depth++;
1795 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1796 0, 0, 0);
1797 ctxt->depth--;
1798 if (rep != NULL) {
1799 current = rep;
1800 while (*current != 0) { /* non input consuming loop */
1801 buffer[nbchars++] = *current++;
1802 if (nbchars >
1803 buffer_size - XML_PARSER_BUFFER_SIZE) {
1804 growBuffer(buffer);
1805 }
1806 }
1807 xmlFree(rep);
1808 }
1809 }
1810 } else {
1811 COPY_BUF(l,buffer,nbchars,c);
1812 str += l;
1813 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1814 growBuffer(buffer);
1815 }
1816 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001817 if (str < last)
1818 c = CUR_SCHAR(str, l);
1819 else
1820 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001821 }
1822 buffer[nbchars++] = 0;
1823 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001824
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001827 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001828}
1829
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830/**
1831 * xmlStringDecodeEntities:
1832 * @ctxt: the parser context
1833 * @str: the input string
1834 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1835 * @end: an end marker xmlChar, 0 if none
1836 * @end2: an end marker xmlChar, 0 if none
1837 * @end3: an end marker xmlChar, 0 if none
1838 *
1839 * Takes a entity string content and process to do the adequate substitutions.
1840 *
1841 * [67] Reference ::= EntityRef | CharRef
1842 *
1843 * [69] PEReference ::= '%' Name ';'
1844 *
1845 * Returns A newly allocated string with the substitution done. The caller
1846 * must deallocate it !
1847 */
1848xmlChar *
1849xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1850 xmlChar end, xmlChar end2, xmlChar end3) {
1851 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1852 end, end2, end3));
1853}
Owen Taylor3473f882001-02-23 17:55:21 +00001854
1855/************************************************************************
1856 * *
1857 * Commodity functions to handle xmlChars *
1858 * *
1859 ************************************************************************/
1860
1861/**
1862 * xmlStrndup:
1863 * @cur: the input xmlChar *
1864 * @len: the len of @cur
1865 *
1866 * a strndup for array of xmlChar's
1867 *
1868 * Returns a new xmlChar * or NULL
1869 */
1870xmlChar *
1871xmlStrndup(const xmlChar *cur, int len) {
1872 xmlChar *ret;
1873
1874 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001875 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001876 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001877 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(NULL);
1879 }
1880 memcpy(ret, cur, len * sizeof(xmlChar));
1881 ret[len] = 0;
1882 return(ret);
1883}
1884
1885/**
1886 * xmlStrdup:
1887 * @cur: the input xmlChar *
1888 *
1889 * a strdup for array of xmlChar's. Since they are supposed to be
1890 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1891 * a termination mark of '0'.
1892 *
1893 * Returns a new xmlChar * or NULL
1894 */
1895xmlChar *
1896xmlStrdup(const xmlChar *cur) {
1897 const xmlChar *p = cur;
1898
1899 if (cur == NULL) return(NULL);
1900 while (*p != 0) p++; /* non input consuming */
1901 return(xmlStrndup(cur, p - cur));
1902}
1903
1904/**
1905 * xmlCharStrndup:
1906 * @cur: the input char *
1907 * @len: the len of @cur
1908 *
1909 * a strndup for char's to xmlChar's
1910 *
1911 * Returns a new xmlChar * or NULL
1912 */
1913
1914xmlChar *
1915xmlCharStrndup(const char *cur, int len) {
1916 int i;
1917 xmlChar *ret;
1918
1919 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001920 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001921 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001922 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 return(NULL);
1924 }
1925 for (i = 0;i < len;i++)
1926 ret[i] = (xmlChar) cur[i];
1927 ret[len] = 0;
1928 return(ret);
1929}
1930
1931/**
1932 * xmlCharStrdup:
1933 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001934 *
1935 * a strdup for char's to xmlChar's
1936 *
1937 * Returns a new xmlChar * or NULL
1938 */
1939
1940xmlChar *
1941xmlCharStrdup(const char *cur) {
1942 const char *p = cur;
1943
1944 if (cur == NULL) return(NULL);
1945 while (*p != '\0') p++; /* non input consuming */
1946 return(xmlCharStrndup(cur, p - cur));
1947}
1948
1949/**
1950 * xmlStrcmp:
1951 * @str1: the first xmlChar *
1952 * @str2: the second xmlChar *
1953 *
1954 * a strcmp for xmlChar's
1955 *
1956 * Returns the integer result of the comparison
1957 */
1958
1959int
1960xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1961 register int tmp;
1962
1963 if (str1 == str2) return(0);
1964 if (str1 == NULL) return(-1);
1965 if (str2 == NULL) return(1);
1966 do {
1967 tmp = *str1++ - *str2;
1968 if (tmp != 0) return(tmp);
1969 } while (*str2++ != 0);
1970 return 0;
1971}
1972
1973/**
1974 * xmlStrEqual:
1975 * @str1: the first xmlChar *
1976 * @str2: the second xmlChar *
1977 *
1978 * Check if both string are equal of have same content
1979 * Should be a bit more readable and faster than xmlStrEqual()
1980 *
1981 * Returns 1 if they are equal, 0 if they are different
1982 */
1983
1984int
1985xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1986 if (str1 == str2) return(1);
1987 if (str1 == NULL) return(0);
1988 if (str2 == NULL) return(0);
1989 do {
1990 if (*str1++ != *str2) return(0);
1991 } while (*str2++);
1992 return(1);
1993}
1994
1995/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001996 * xmlStrQEqual:
1997 * @pref: the prefix of the QName
1998 * @name: the localname of the QName
1999 * @str: the second xmlChar *
2000 *
2001 * Check if a QName is Equal to a given string
2002 *
2003 * Returns 1 if they are equal, 0 if they are different
2004 */
2005
2006int
2007xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2008 if (pref == NULL) return(xmlStrEqual(name, str));
2009 if (name == NULL) return(0);
2010 if (str == NULL) return(0);
2011
2012 do {
2013 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002014 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002015 if (*str++ != ':') return(0);
2016 do {
2017 if (*name++ != *str) return(0);
2018 } while (*str++);
2019 return(1);
2020}
2021
2022/**
Owen Taylor3473f882001-02-23 17:55:21 +00002023 * xmlStrncmp:
2024 * @str1: the first xmlChar *
2025 * @str2: the second xmlChar *
2026 * @len: the max comparison length
2027 *
2028 * a strncmp for xmlChar's
2029 *
2030 * Returns the integer result of the comparison
2031 */
2032
2033int
2034xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2035 register int tmp;
2036
2037 if (len <= 0) return(0);
2038 if (str1 == str2) return(0);
2039 if (str1 == NULL) return(-1);
2040 if (str2 == NULL) return(1);
2041 do {
2042 tmp = *str1++ - *str2;
2043 if (tmp != 0 || --len == 0) return(tmp);
2044 } while (*str2++ != 0);
2045 return 0;
2046}
2047
Daniel Veillardb44025c2001-10-11 22:55:55 +00002048static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002049 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2050 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2051 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2052 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2053 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2054 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2055 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2056 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2057 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2058 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2059 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2060 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2061 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2062 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2063 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2064 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2065 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2066 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2067 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2068 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2069 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2070 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2071 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2072 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2073 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2074 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2075 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2076 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2077 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2078 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2079 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2080 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2081};
2082
2083/**
2084 * xmlStrcasecmp:
2085 * @str1: the first xmlChar *
2086 * @str2: the second xmlChar *
2087 *
2088 * a strcasecmp for xmlChar's
2089 *
2090 * Returns the integer result of the comparison
2091 */
2092
2093int
2094xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2095 register int tmp;
2096
2097 if (str1 == str2) return(0);
2098 if (str1 == NULL) return(-1);
2099 if (str2 == NULL) return(1);
2100 do {
2101 tmp = casemap[*str1++] - casemap[*str2];
2102 if (tmp != 0) return(tmp);
2103 } while (*str2++ != 0);
2104 return 0;
2105}
2106
2107/**
2108 * xmlStrncasecmp:
2109 * @str1: the first xmlChar *
2110 * @str2: the second xmlChar *
2111 * @len: the max comparison length
2112 *
2113 * a strncasecmp for xmlChar's
2114 *
2115 * Returns the integer result of the comparison
2116 */
2117
2118int
2119xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2120 register int tmp;
2121
2122 if (len <= 0) return(0);
2123 if (str1 == str2) return(0);
2124 if (str1 == NULL) return(-1);
2125 if (str2 == NULL) return(1);
2126 do {
2127 tmp = casemap[*str1++] - casemap[*str2];
2128 if (tmp != 0 || --len == 0) return(tmp);
2129 } while (*str2++ != 0);
2130 return 0;
2131}
2132
2133/**
2134 * xmlStrchr:
2135 * @str: the xmlChar * array
2136 * @val: the xmlChar to search
2137 *
2138 * a strchr for xmlChar's
2139 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002140 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002141 */
2142
2143const xmlChar *
2144xmlStrchr(const xmlChar *str, xmlChar val) {
2145 if (str == NULL) return(NULL);
2146 while (*str != 0) { /* non input consuming */
2147 if (*str == val) return((xmlChar *) str);
2148 str++;
2149 }
2150 return(NULL);
2151}
2152
2153/**
2154 * xmlStrstr:
2155 * @str: the xmlChar * array (haystack)
2156 * @val: the xmlChar to search (needle)
2157 *
2158 * a strstr for xmlChar's
2159 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002160 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002161 */
2162
2163const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002164xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002165 int n;
2166
2167 if (str == NULL) return(NULL);
2168 if (val == NULL) return(NULL);
2169 n = xmlStrlen(val);
2170
2171 if (n == 0) return(str);
2172 while (*str != 0) { /* non input consuming */
2173 if (*str == *val) {
2174 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2175 }
2176 str++;
2177 }
2178 return(NULL);
2179}
2180
2181/**
2182 * xmlStrcasestr:
2183 * @str: the xmlChar * array (haystack)
2184 * @val: the xmlChar to search (needle)
2185 *
2186 * a case-ignoring strstr for xmlChar's
2187 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002188 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002189 */
2190
2191const xmlChar *
2192xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2193 int n;
2194
2195 if (str == NULL) return(NULL);
2196 if (val == NULL) return(NULL);
2197 n = xmlStrlen(val);
2198
2199 if (n == 0) return(str);
2200 while (*str != 0) { /* non input consuming */
2201 if (casemap[*str] == casemap[*val])
2202 if (!xmlStrncasecmp(str, val, n)) return(str);
2203 str++;
2204 }
2205 return(NULL);
2206}
2207
2208/**
2209 * xmlStrsub:
2210 * @str: the xmlChar * array (haystack)
2211 * @start: the index of the first char (zero based)
2212 * @len: the length of the substring
2213 *
2214 * Extract a substring of a given string
2215 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002216 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002217 */
2218
2219xmlChar *
2220xmlStrsub(const xmlChar *str, int start, int len) {
2221 int i;
2222
2223 if (str == NULL) return(NULL);
2224 if (start < 0) return(NULL);
2225 if (len < 0) return(NULL);
2226
2227 for (i = 0;i < start;i++) {
2228 if (*str == 0) return(NULL);
2229 str++;
2230 }
2231 if (*str == 0) return(NULL);
2232 return(xmlStrndup(str, len));
2233}
2234
2235/**
2236 * xmlStrlen:
2237 * @str: the xmlChar * array
2238 *
2239 * length of a xmlChar's string
2240 *
2241 * Returns the number of xmlChar contained in the ARRAY.
2242 */
2243
2244int
2245xmlStrlen(const xmlChar *str) {
2246 int len = 0;
2247
2248 if (str == NULL) return(0);
2249 while (*str != 0) { /* non input consuming */
2250 str++;
2251 len++;
2252 }
2253 return(len);
2254}
2255
2256/**
2257 * xmlStrncat:
2258 * @cur: the original xmlChar * array
2259 * @add: the xmlChar * array added
2260 * @len: the length of @add
2261 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002262 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002263 * first bytes of @add.
2264 *
2265 * Returns a new xmlChar *, the original @cur is reallocated if needed
2266 * and should not be freed
2267 */
2268
2269xmlChar *
2270xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2271 int size;
2272 xmlChar *ret;
2273
2274 if ((add == NULL) || (len == 0))
2275 return(cur);
2276 if (cur == NULL)
2277 return(xmlStrndup(add, len));
2278
2279 size = xmlStrlen(cur);
2280 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2281 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002282 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002283 return(cur);
2284 }
2285 memcpy(&ret[size], add, len * sizeof(xmlChar));
2286 ret[size + len] = 0;
2287 return(ret);
2288}
2289
2290/**
2291 * xmlStrcat:
2292 * @cur: the original xmlChar * array
2293 * @add: the xmlChar * array added
2294 *
2295 * a strcat for array of xmlChar's. Since they are supposed to be
2296 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2297 * a termination mark of '0'.
2298 *
2299 * Returns a new xmlChar * containing the concatenated string.
2300 */
2301xmlChar *
2302xmlStrcat(xmlChar *cur, const xmlChar *add) {
2303 const xmlChar *p = add;
2304
2305 if (add == NULL) return(cur);
2306 if (cur == NULL)
2307 return(xmlStrdup(add));
2308
2309 while (*p != 0) p++; /* non input consuming */
2310 return(xmlStrncat(cur, add, p - add));
2311}
2312
2313/************************************************************************
2314 * *
2315 * Commodity functions, cleanup needed ? *
2316 * *
2317 ************************************************************************/
2318
2319/**
2320 * areBlanks:
2321 * @ctxt: an XML parser context
2322 * @str: a xmlChar *
2323 * @len: the size of @str
2324 *
2325 * Is this a sequence of blank chars that one can ignore ?
2326 *
2327 * Returns 1 if ignorable 0 otherwise.
2328 */
2329
2330static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2331 int i, ret;
2332 xmlNodePtr lastChild;
2333
Daniel Veillard05c13a22001-09-09 08:38:09 +00002334 /*
2335 * Don't spend time trying to differentiate them, the same callback is
2336 * used !
2337 */
2338 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002339 return(0);
2340
Owen Taylor3473f882001-02-23 17:55:21 +00002341 /*
2342 * Check for xml:space value.
2343 */
2344 if (*(ctxt->space) == 1)
2345 return(0);
2346
2347 /*
2348 * Check that the string is made of blanks
2349 */
2350 for (i = 0;i < len;i++)
2351 if (!(IS_BLANK(str[i]))) return(0);
2352
2353 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002354 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002355 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002356 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002357 if (ctxt->myDoc != NULL) {
2358 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2359 if (ret == 0) return(1);
2360 if (ret == 1) return(0);
2361 }
2362
2363 /*
2364 * Otherwise, heuristic :-\
2365 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002366 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002367 if ((ctxt->node->children == NULL) &&
2368 (RAW == '<') && (NXT(1) == '/')) return(0);
2369
2370 lastChild = xmlGetLastChild(ctxt->node);
2371 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002372 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2373 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002374 } else if (xmlNodeIsText(lastChild))
2375 return(0);
2376 else if ((ctxt->node->children != NULL) &&
2377 (xmlNodeIsText(ctxt->node->children)))
2378 return(0);
2379 return(1);
2380}
2381
Owen Taylor3473f882001-02-23 17:55:21 +00002382/************************************************************************
2383 * *
2384 * Extra stuff for namespace support *
2385 * Relates to http://www.w3.org/TR/WD-xml-names *
2386 * *
2387 ************************************************************************/
2388
2389/**
2390 * xmlSplitQName:
2391 * @ctxt: an XML parser context
2392 * @name: an XML parser context
2393 * @prefix: a xmlChar **
2394 *
2395 * parse an UTF8 encoded XML qualified name string
2396 *
2397 * [NS 5] QName ::= (Prefix ':')? LocalPart
2398 *
2399 * [NS 6] Prefix ::= NCName
2400 *
2401 * [NS 7] LocalPart ::= NCName
2402 *
2403 * Returns the local part, and prefix is updated
2404 * to get the Prefix if any.
2405 */
2406
2407xmlChar *
2408xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2409 xmlChar buf[XML_MAX_NAMELEN + 5];
2410 xmlChar *buffer = NULL;
2411 int len = 0;
2412 int max = XML_MAX_NAMELEN;
2413 xmlChar *ret = NULL;
2414 const xmlChar *cur = name;
2415 int c;
2416
2417 *prefix = NULL;
2418
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002419 if (cur == NULL) return(NULL);
2420
Owen Taylor3473f882001-02-23 17:55:21 +00002421#ifndef XML_XML_NAMESPACE
2422 /* xml: prefix is not really a namespace */
2423 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2424 (cur[2] == 'l') && (cur[3] == ':'))
2425 return(xmlStrdup(name));
2426#endif
2427
Daniel Veillard597bc482003-07-24 16:08:28 +00002428 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002429 if (cur[0] == ':')
2430 return(xmlStrdup(name));
2431
2432 c = *cur++;
2433 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2434 buf[len++] = c;
2435 c = *cur++;
2436 }
2437 if (len >= max) {
2438 /*
2439 * Okay someone managed to make a huge name, so he's ready to pay
2440 * for the processing speed.
2441 */
2442 max = len * 2;
2443
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002444 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002445 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002446 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002447 return(NULL);
2448 }
2449 memcpy(buffer, buf, len);
2450 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2451 if (len + 10 > max) {
2452 max *= 2;
2453 buffer = (xmlChar *) xmlRealloc(buffer,
2454 max * sizeof(xmlChar));
2455 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002456 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002457 return(NULL);
2458 }
2459 }
2460 buffer[len++] = c;
2461 c = *cur++;
2462 }
2463 buffer[len] = 0;
2464 }
2465
Daniel Veillard597bc482003-07-24 16:08:28 +00002466 /* nasty but well=formed
2467 if ((c == ':') && (*cur == 0)) {
2468 return(xmlStrdup(name));
2469 } */
2470
Owen Taylor3473f882001-02-23 17:55:21 +00002471 if (buffer == NULL)
2472 ret = xmlStrndup(buf, len);
2473 else {
2474 ret = buffer;
2475 buffer = NULL;
2476 max = XML_MAX_NAMELEN;
2477 }
2478
2479
2480 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002481 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002482 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002483 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002484 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002485 }
Owen Taylor3473f882001-02-23 17:55:21 +00002486 len = 0;
2487
Daniel Veillardbb284f42002-10-16 18:02:47 +00002488 /*
2489 * Check that the first character is proper to start
2490 * a new name
2491 */
2492 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2493 ((c >= 0x41) && (c <= 0x5A)) ||
2494 (c == '_') || (c == ':'))) {
2495 int l;
2496 int first = CUR_SCHAR(cur, l);
2497
2498 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002499 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002500 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002501 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002502 }
2503 }
2504 cur++;
2505
Owen Taylor3473f882001-02-23 17:55:21 +00002506 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2507 buf[len++] = c;
2508 c = *cur++;
2509 }
2510 if (len >= max) {
2511 /*
2512 * Okay someone managed to make a huge name, so he's ready to pay
2513 * for the processing speed.
2514 */
2515 max = len * 2;
2516
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002517 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002518 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002519 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002520 return(NULL);
2521 }
2522 memcpy(buffer, buf, len);
2523 while (c != 0) { /* tested bigname2.xml */
2524 if (len + 10 > max) {
2525 max *= 2;
2526 buffer = (xmlChar *) xmlRealloc(buffer,
2527 max * sizeof(xmlChar));
2528 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002529 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002530 return(NULL);
2531 }
2532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
2571 * [6] Names ::= Name (S Name)*
2572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2625 const xmlChar *cmp = other;
2626 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
2635 }
2636 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2637 /* success */
2638 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002639 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002640 }
2641 /* failure (or end of input buffer), check with full function */
2642 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002643 /* strings coming from the dictionnary direct compare possible */
2644 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002645 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002646 }
2647 return ret;
2648}
2649
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002650static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002651xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652 int len = 0, l;
2653 int c;
2654 int count = 0;
2655
2656 /*
2657 * Handler for more complex cases
2658 */
2659 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002660 c = CUR_CHAR(l);
2661 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2662 (!IS_LETTER(c) && (c != '_') &&
2663 (c != ':'))) {
2664 return(NULL);
2665 }
2666
2667 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2668 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2669 (c == '.') || (c == '-') ||
2670 (c == '_') || (c == ':') ||
2671 (IS_COMBINING(c)) ||
2672 (IS_EXTENDER(c)))) {
2673 if (count++ > 100) {
2674 count = 0;
2675 GROW;
2676 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002677 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002678 NEXTL(l);
2679 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002680 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002682}
2683
2684/**
2685 * xmlParseStringName:
2686 * @ctxt: an XML parser context
2687 * @str: a pointer to the string pointer (IN/OUT)
2688 *
2689 * parse an XML name.
2690 *
2691 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2692 * CombiningChar | Extender
2693 *
2694 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2695 *
2696 * [6] Names ::= Name (S Name)*
2697 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002698 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002699 * is updated to the current location in the string.
2700 */
2701
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002702static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002703xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2704 xmlChar buf[XML_MAX_NAMELEN + 5];
2705 const xmlChar *cur = *str;
2706 int len = 0, l;
2707 int c;
2708
2709 c = CUR_SCHAR(cur, l);
2710 if (!IS_LETTER(c) && (c != '_') &&
2711 (c != ':')) {
2712 return(NULL);
2713 }
2714
2715 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2716 (c == '.') || (c == '-') ||
2717 (c == '_') || (c == ':') ||
2718 (IS_COMBINING(c)) ||
2719 (IS_EXTENDER(c))) {
2720 COPY_BUF(l,buf,len,c);
2721 cur += l;
2722 c = CUR_SCHAR(cur, l);
2723 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2724 /*
2725 * Okay someone managed to make a huge name, so he's ready to pay
2726 * for the processing speed.
2727 */
2728 xmlChar *buffer;
2729 int max = len * 2;
2730
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002731 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002732 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002733 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002734 return(NULL);
2735 }
2736 memcpy(buffer, buf, len);
2737 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2738 (c == '.') || (c == '-') ||
2739 (c == '_') || (c == ':') ||
2740 (IS_COMBINING(c)) ||
2741 (IS_EXTENDER(c))) {
2742 if (len + 10 > max) {
2743 max *= 2;
2744 buffer = (xmlChar *) xmlRealloc(buffer,
2745 max * sizeof(xmlChar));
2746 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002747 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002748 return(NULL);
2749 }
2750 }
2751 COPY_BUF(l,buffer,len,c);
2752 cur += l;
2753 c = CUR_SCHAR(cur, l);
2754 }
2755 buffer[len] = 0;
2756 *str = cur;
2757 return(buffer);
2758 }
2759 }
2760 *str = cur;
2761 return(xmlStrndup(buf, len));
2762}
2763
2764/**
2765 * xmlParseNmtoken:
2766 * @ctxt: an XML parser context
2767 *
2768 * parse an XML Nmtoken.
2769 *
2770 * [7] Nmtoken ::= (NameChar)+
2771 *
2772 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2773 *
2774 * Returns the Nmtoken parsed or NULL
2775 */
2776
2777xmlChar *
2778xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2779 xmlChar buf[XML_MAX_NAMELEN + 5];
2780 int len = 0, l;
2781 int c;
2782 int count = 0;
2783
2784 GROW;
2785 c = CUR_CHAR(l);
2786
2787 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2788 (c == '.') || (c == '-') ||
2789 (c == '_') || (c == ':') ||
2790 (IS_COMBINING(c)) ||
2791 (IS_EXTENDER(c))) {
2792 if (count++ > 100) {
2793 count = 0;
2794 GROW;
2795 }
2796 COPY_BUF(l,buf,len,c);
2797 NEXTL(l);
2798 c = CUR_CHAR(l);
2799 if (len >= XML_MAX_NAMELEN) {
2800 /*
2801 * Okay someone managed to make a huge token, so he's ready to pay
2802 * for the processing speed.
2803 */
2804 xmlChar *buffer;
2805 int max = len * 2;
2806
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002807 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002808 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002810 return(NULL);
2811 }
2812 memcpy(buffer, buf, len);
2813 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2814 (c == '.') || (c == '-') ||
2815 (c == '_') || (c == ':') ||
2816 (IS_COMBINING(c)) ||
2817 (IS_EXTENDER(c))) {
2818 if (count++ > 100) {
2819 count = 0;
2820 GROW;
2821 }
2822 if (len + 10 > max) {
2823 max *= 2;
2824 buffer = (xmlChar *) xmlRealloc(buffer,
2825 max * sizeof(xmlChar));
2826 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002827 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002828 return(NULL);
2829 }
2830 }
2831 COPY_BUF(l,buffer,len,c);
2832 NEXTL(l);
2833 c = CUR_CHAR(l);
2834 }
2835 buffer[len] = 0;
2836 return(buffer);
2837 }
2838 }
2839 if (len == 0)
2840 return(NULL);
2841 return(xmlStrndup(buf, len));
2842}
2843
2844/**
2845 * xmlParseEntityValue:
2846 * @ctxt: an XML parser context
2847 * @orig: if non-NULL store a copy of the original entity value
2848 *
2849 * parse a value for ENTITY declarations
2850 *
2851 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2852 * "'" ([^%&'] | PEReference | Reference)* "'"
2853 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002854 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002855 */
2856
2857xmlChar *
2858xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2859 xmlChar *buf = NULL;
2860 int len = 0;
2861 int size = XML_PARSER_BUFFER_SIZE;
2862 int c, l;
2863 xmlChar stop;
2864 xmlChar *ret = NULL;
2865 const xmlChar *cur = NULL;
2866 xmlParserInputPtr input;
2867
2868 if (RAW == '"') stop = '"';
2869 else if (RAW == '\'') stop = '\'';
2870 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002872 return(NULL);
2873 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879
2880 /*
2881 * The content of the entity definition is copied in a buffer.
2882 */
2883
2884 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2885 input = ctxt->input;
2886 GROW;
2887 NEXT;
2888 c = CUR_CHAR(l);
2889 /*
2890 * NOTE: 4.4.5 Included in Literal
2891 * When a parameter entity reference appears in a literal entity
2892 * value, ... a single or double quote character in the replacement
2893 * text is always treated as a normal data character and will not
2894 * terminate the literal.
2895 * In practice it means we stop the loop only when back at parsing
2896 * the initial entity and the quote is found
2897 */
2898 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2899 (ctxt->input != input))) {
2900 if (len + 5 >= size) {
2901 size *= 2;
2902 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2903 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002904 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002905 return(NULL);
2906 }
2907 }
2908 COPY_BUF(l,buf,len,c);
2909 NEXTL(l);
2910 /*
2911 * Pop-up of finished entities.
2912 */
2913 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2914 xmlPopInput(ctxt);
2915
2916 GROW;
2917 c = CUR_CHAR(l);
2918 if (c == 0) {
2919 GROW;
2920 c = CUR_CHAR(l);
2921 }
2922 }
2923 buf[len] = 0;
2924
2925 /*
2926 * Raise problem w.r.t. '&' and '%' being used in non-entities
2927 * reference constructs. Note Charref will be handled in
2928 * xmlStringDecodeEntities()
2929 */
2930 cur = buf;
2931 while (*cur != 0) { /* non input consuming */
2932 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2933 xmlChar *name;
2934 xmlChar tmp = *cur;
2935
2936 cur++;
2937 name = xmlParseStringName(ctxt, &cur);
2938 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002939 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002940 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002941 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002942 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002943 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2944 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002945 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002946 }
2947 if (name != NULL)
2948 xmlFree(name);
2949 }
2950 cur++;
2951 }
2952
2953 /*
2954 * Then PEReference entities are substituted.
2955 */
2956 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002957 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002958 xmlFree(buf);
2959 } else {
2960 NEXT;
2961 /*
2962 * NOTE: 4.4.7 Bypassed
2963 * When a general entity reference appears in the EntityValue in
2964 * an entity declaration, it is bypassed and left as is.
2965 * so XML_SUBSTITUTE_REF is not set here.
2966 */
2967 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2968 0, 0, 0);
2969 if (orig != NULL)
2970 *orig = buf;
2971 else
2972 xmlFree(buf);
2973 }
2974
2975 return(ret);
2976}
2977
2978/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002979 * xmlParseAttValueComplex:
2980 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002981 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002982 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002983 *
2984 * parse a value for an attribute, this is the fallback function
2985 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002986 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002987 *
2988 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2989 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002990static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002991xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002992 xmlChar limit = 0;
2993 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002994 int len = 0;
2995 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002996 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002997 xmlChar *current = NULL;
2998 xmlEntityPtr ent;
2999
Owen Taylor3473f882001-02-23 17:55:21 +00003000 if (NXT(0) == '"') {
3001 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3002 limit = '"';
3003 NEXT;
3004 } else if (NXT(0) == '\'') {
3005 limit = '\'';
3006 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3007 NEXT;
3008 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 return(NULL);
3011 }
3012
3013 /*
3014 * allocate a translation buffer.
3015 */
3016 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003017 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003018 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003019
3020 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003021 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003022 */
3023 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003024 while ((NXT(0) != limit) && /* checked */
3025 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003026 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003027 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003028 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003029 if (NXT(1) == '#') {
3030 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003031
Owen Taylor3473f882001-02-23 17:55:21 +00003032 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003033 if (ctxt->replaceEntities) {
3034 if (len > buf_size - 10) {
3035 growBuffer(buf);
3036 }
3037 buf[len++] = '&';
3038 } else {
3039 /*
3040 * The reparsing will be done in xmlStringGetNodeList()
3041 * called by the attribute() function in SAX.c
3042 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003043 if (len > buf_size - 10) {
3044 growBuffer(buf);
3045 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003046 buf[len++] = '&';
3047 buf[len++] = '#';
3048 buf[len++] = '3';
3049 buf[len++] = '8';
3050 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003051 }
3052 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003053 if (len > buf_size - 10) {
3054 growBuffer(buf);
3055 }
Owen Taylor3473f882001-02-23 17:55:21 +00003056 len += xmlCopyChar(0, &buf[len], val);
3057 }
3058 } else {
3059 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003060 if ((ent != NULL) &&
3061 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3062 if (len > buf_size - 10) {
3063 growBuffer(buf);
3064 }
3065 if ((ctxt->replaceEntities == 0) &&
3066 (ent->content[0] == '&')) {
3067 buf[len++] = '&';
3068 buf[len++] = '#';
3069 buf[len++] = '3';
3070 buf[len++] = '8';
3071 buf[len++] = ';';
3072 } else {
3073 buf[len++] = ent->content[0];
3074 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003075 } else if ((ent != NULL) &&
3076 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003077 xmlChar *rep;
3078
3079 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3080 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003081 XML_SUBSTITUTE_REF,
3082 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003083 if (rep != NULL) {
3084 current = rep;
3085 while (*current != 0) { /* non input consuming */
3086 buf[len++] = *current++;
3087 if (len > buf_size - 10) {
3088 growBuffer(buf);
3089 }
3090 }
3091 xmlFree(rep);
3092 }
3093 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003094 if (len > buf_size - 10) {
3095 growBuffer(buf);
3096 }
Owen Taylor3473f882001-02-23 17:55:21 +00003097 if (ent->content != NULL)
3098 buf[len++] = ent->content[0];
3099 }
3100 } else if (ent != NULL) {
3101 int i = xmlStrlen(ent->name);
3102 const xmlChar *cur = ent->name;
3103
3104 /*
3105 * This may look absurd but is needed to detect
3106 * entities problems
3107 */
3108 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3109 (ent->content != NULL)) {
3110 xmlChar *rep;
3111 rep = xmlStringDecodeEntities(ctxt, ent->content,
3112 XML_SUBSTITUTE_REF, 0, 0, 0);
3113 if (rep != NULL)
3114 xmlFree(rep);
3115 }
3116
3117 /*
3118 * Just output the reference
3119 */
3120 buf[len++] = '&';
3121 if (len > buf_size - i - 10) {
3122 growBuffer(buf);
3123 }
3124 for (;i > 0;i--)
3125 buf[len++] = *cur++;
3126 buf[len++] = ';';
3127 }
3128 }
3129 } else {
3130 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003131 if ((len != 0) || (!normalize)) {
3132 if ((!normalize) || (!in_space)) {
3133 COPY_BUF(l,buf,len,0x20);
3134 if (len > buf_size - 10) {
3135 growBuffer(buf);
3136 }
3137 }
3138 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003139 }
3140 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003141 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003142 COPY_BUF(l,buf,len,c);
3143 if (len > buf_size - 10) {
3144 growBuffer(buf);
3145 }
3146 }
3147 NEXTL(l);
3148 }
3149 GROW;
3150 c = CUR_CHAR(l);
3151 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003152 if ((in_space) && (normalize)) {
3153 while (buf[len - 1] == 0x20) len--;
3154 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003155 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003157 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003158 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003159 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3160 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003161 } else
3162 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003163 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003164 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003165
3166mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003167 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003168 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003169}
3170
3171/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003172 * xmlParseAttValue:
3173 * @ctxt: an XML parser context
3174 *
3175 * parse a value for an attribute
3176 * Note: the parser won't do substitution of entities here, this
3177 * will be handled later in xmlStringGetNodeList
3178 *
3179 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3180 * "'" ([^<&'] | Reference)* "'"
3181 *
3182 * 3.3.3 Attribute-Value Normalization:
3183 * Before the value of an attribute is passed to the application or
3184 * checked for validity, the XML processor must normalize it as follows:
3185 * - a character reference is processed by appending the referenced
3186 * character to the attribute value
3187 * - an entity reference is processed by recursively processing the
3188 * replacement text of the entity
3189 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3190 * appending #x20 to the normalized value, except that only a single
3191 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3192 * parsed entity or the literal entity value of an internal parsed entity
3193 * - other characters are processed by appending them to the normalized value
3194 * If the declared value is not CDATA, then the XML processor must further
3195 * process the normalized attribute value by discarding any leading and
3196 * trailing space (#x20) characters, and by replacing sequences of space
3197 * (#x20) characters by a single space (#x20) character.
3198 * All attributes for which no declaration has been read should be treated
3199 * by a non-validating parser as if declared CDATA.
3200 *
3201 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3202 */
3203
3204
3205xmlChar *
3206xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003207 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003208}
3209
3210/**
Owen Taylor3473f882001-02-23 17:55:21 +00003211 * xmlParseSystemLiteral:
3212 * @ctxt: an XML parser context
3213 *
3214 * parse an XML Literal
3215 *
3216 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3217 *
3218 * Returns the SystemLiteral parsed or NULL
3219 */
3220
3221xmlChar *
3222xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3223 xmlChar *buf = NULL;
3224 int len = 0;
3225 int size = XML_PARSER_BUFFER_SIZE;
3226 int cur, l;
3227 xmlChar stop;
3228 int state = ctxt->instate;
3229 int count = 0;
3230
3231 SHRINK;
3232 if (RAW == '"') {
3233 NEXT;
3234 stop = '"';
3235 } else if (RAW == '\'') {
3236 NEXT;
3237 stop = '\'';
3238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003239 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003240 return(NULL);
3241 }
3242
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003243 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003244 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003245 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003246 return(NULL);
3247 }
3248 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3249 cur = CUR_CHAR(l);
3250 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3251 if (len + 5 >= size) {
3252 size *= 2;
3253 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3254 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003255 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003256 ctxt->instate = (xmlParserInputState) state;
3257 return(NULL);
3258 }
3259 }
3260 count++;
3261 if (count > 50) {
3262 GROW;
3263 count = 0;
3264 }
3265 COPY_BUF(l,buf,len,cur);
3266 NEXTL(l);
3267 cur = CUR_CHAR(l);
3268 if (cur == 0) {
3269 GROW;
3270 SHRINK;
3271 cur = CUR_CHAR(l);
3272 }
3273 }
3274 buf[len] = 0;
3275 ctxt->instate = (xmlParserInputState) state;
3276 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003277 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003278 } else {
3279 NEXT;
3280 }
3281 return(buf);
3282}
3283
3284/**
3285 * xmlParsePubidLiteral:
3286 * @ctxt: an XML parser context
3287 *
3288 * parse an XML public literal
3289 *
3290 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3291 *
3292 * Returns the PubidLiteral parsed or NULL.
3293 */
3294
3295xmlChar *
3296xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3297 xmlChar *buf = NULL;
3298 int len = 0;
3299 int size = XML_PARSER_BUFFER_SIZE;
3300 xmlChar cur;
3301 xmlChar stop;
3302 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003303 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003304
3305 SHRINK;
3306 if (RAW == '"') {
3307 NEXT;
3308 stop = '"';
3309 } else if (RAW == '\'') {
3310 NEXT;
3311 stop = '\'';
3312 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003313 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003314 return(NULL);
3315 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003316 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003317 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003318 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003319 return(NULL);
3320 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003321 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003322 cur = CUR;
3323 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3324 if (len + 1 >= size) {
3325 size *= 2;
3326 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3327 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003328 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003329 return(NULL);
3330 }
3331 }
3332 buf[len++] = cur;
3333 count++;
3334 if (count > 50) {
3335 GROW;
3336 count = 0;
3337 }
3338 NEXT;
3339 cur = CUR;
3340 if (cur == 0) {
3341 GROW;
3342 SHRINK;
3343 cur = CUR;
3344 }
3345 }
3346 buf[len] = 0;
3347 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 } else {
3350 NEXT;
3351 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003352 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003353 return(buf);
3354}
3355
Daniel Veillard48b2f892001-02-25 16:11:03 +00003356void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003357/**
3358 * xmlParseCharData:
3359 * @ctxt: an XML parser context
3360 * @cdata: int indicating whether we are within a CDATA section
3361 *
3362 * parse a CharData section.
3363 * if we are within a CDATA section ']]>' marks an end of section.
3364 *
3365 * The right angle bracket (>) may be represented using the string "&gt;",
3366 * and must, for compatibility, be escaped using "&gt;" or a character
3367 * reference when it appears in the string "]]>" in content, when that
3368 * string is not marking the end of a CDATA section.
3369 *
3370 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3371 */
3372
3373void
3374xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003375 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003376 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003377 int line = ctxt->input->line;
3378 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003379
3380 SHRINK;
3381 GROW;
3382 /*
3383 * Accelerated common case where input don't need to be
3384 * modified before passing it to the handler.
3385 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003386 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003387 in = ctxt->input->cur;
3388 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003389get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003390 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3391 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003392 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003393 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003394 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003395 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003396 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003397 ctxt->input->line++;
3398 in++;
3399 }
3400 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003401 }
3402 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003403 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003404 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003405 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003406 return;
3407 }
3408 in++;
3409 goto get_more;
3410 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003411 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003412 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003413 if ((ctxt->sax->ignorableWhitespace !=
3414 ctxt->sax->characters) &&
3415 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003416 const xmlChar *tmp = ctxt->input->cur;
3417 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003418
Daniel Veillarda7374592001-05-10 14:17:55 +00003419 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003420 ctxt->sax->ignorableWhitespace(ctxt->userData,
3421 tmp, nbchar);
3422 } else if (ctxt->sax->characters != NULL)
3423 ctxt->sax->characters(ctxt->userData,
3424 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003425 line = ctxt->input->line;
3426 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003427 } else {
3428 if (ctxt->sax->characters != NULL)
3429 ctxt->sax->characters(ctxt->userData,
3430 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003431 line = ctxt->input->line;
3432 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003433 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003434 }
3435 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003436 if (*in == 0xD) {
3437 in++;
3438 if (*in == 0xA) {
3439 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003440 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003441 ctxt->input->line++;
3442 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003443 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003444 in--;
3445 }
3446 if (*in == '<') {
3447 return;
3448 }
3449 if (*in == '&') {
3450 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003451 }
3452 SHRINK;
3453 GROW;
3454 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003455 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003456 nbchar = 0;
3457 }
Daniel Veillard50582112001-03-26 22:52:16 +00003458 ctxt->input->line = line;
3459 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003460 xmlParseCharDataComplex(ctxt, cdata);
3461}
3462
Daniel Veillard01c13b52002-12-10 15:19:08 +00003463/**
3464 * xmlParseCharDataComplex:
3465 * @ctxt: an XML parser context
3466 * @cdata: int indicating whether we are within a CDATA section
3467 *
3468 * parse a CharData section.this is the fallback function
3469 * of xmlParseCharData() when the parsing requires handling
3470 * of non-ASCII characters.
3471 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003472void
3473xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003474 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3475 int nbchar = 0;
3476 int cur, l;
3477 int count = 0;
3478
3479 SHRINK;
3480 GROW;
3481 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003482 while ((cur != '<') && /* checked */
3483 (cur != '&') &&
3484 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003485 if ((cur == ']') && (NXT(1) == ']') &&
3486 (NXT(2) == '>')) {
3487 if (cdata) break;
3488 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003489 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003490 }
3491 }
3492 COPY_BUF(l,buf,nbchar,cur);
3493 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3494 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003495 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003496 */
3497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3498 if (areBlanks(ctxt, buf, nbchar)) {
3499 if (ctxt->sax->ignorableWhitespace != NULL)
3500 ctxt->sax->ignorableWhitespace(ctxt->userData,
3501 buf, nbchar);
3502 } else {
3503 if (ctxt->sax->characters != NULL)
3504 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3505 }
3506 }
3507 nbchar = 0;
3508 }
3509 count++;
3510 if (count > 50) {
3511 GROW;
3512 count = 0;
3513 }
3514 NEXTL(l);
3515 cur = CUR_CHAR(l);
3516 }
3517 if (nbchar != 0) {
3518 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003519 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003520 */
3521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3522 if (areBlanks(ctxt, buf, nbchar)) {
3523 if (ctxt->sax->ignorableWhitespace != NULL)
3524 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3525 } else {
3526 if (ctxt->sax->characters != NULL)
3527 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3528 }
3529 }
3530 }
3531}
3532
3533/**
3534 * xmlParseExternalID:
3535 * @ctxt: an XML parser context
3536 * @publicID: a xmlChar** receiving PubidLiteral
3537 * @strict: indicate whether we should restrict parsing to only
3538 * production [75], see NOTE below
3539 *
3540 * Parse an External ID or a Public ID
3541 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003542 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003543 * 'PUBLIC' S PubidLiteral S SystemLiteral
3544 *
3545 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3546 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3547 *
3548 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3549 *
3550 * Returns the function returns SystemLiteral and in the second
3551 * case publicID receives PubidLiteral, is strict is off
3552 * it is possible to return NULL and have publicID set.
3553 */
3554
3555xmlChar *
3556xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3557 xmlChar *URI = NULL;
3558
3559 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003560
3561 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003562 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3563 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3564 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3565 SKIP(6);
3566 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003567 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3568 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 SKIP_BLANKS;
3571 URI = xmlParseSystemLiteral(ctxt);
3572 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003573 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003574 }
3575 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3576 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3577 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3578 SKIP(6);
3579 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003580 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003581 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003582 }
3583 SKIP_BLANKS;
3584 *publicID = xmlParsePubidLiteral(ctxt);
3585 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003586 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003587 }
3588 if (strict) {
3589 /*
3590 * We don't handle [83] so "S SystemLiteral" is required.
3591 */
3592 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003594 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003595 }
3596 } else {
3597 /*
3598 * We handle [83] so we return immediately, if
3599 * "S SystemLiteral" is not detected. From a purely parsing
3600 * point of view that's a nice mess.
3601 */
3602 const xmlChar *ptr;
3603 GROW;
3604
3605 ptr = CUR_PTR;
3606 if (!IS_BLANK(*ptr)) return(NULL);
3607
3608 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3609 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3610 }
3611 SKIP_BLANKS;
3612 URI = xmlParseSystemLiteral(ctxt);
3613 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003614 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003615 }
3616 }
3617 return(URI);
3618}
3619
3620/**
3621 * xmlParseComment:
3622 * @ctxt: an XML parser context
3623 *
3624 * Skip an XML (SGML) comment <!-- .... -->
3625 * The spec says that "For compatibility, the string "--" (double-hyphen)
3626 * must not occur within comments. "
3627 *
3628 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3629 */
3630void
3631xmlParseComment(xmlParserCtxtPtr ctxt) {
3632 xmlChar *buf = NULL;
3633 int len;
3634 int size = XML_PARSER_BUFFER_SIZE;
3635 int q, ql;
3636 int r, rl;
3637 int cur, l;
3638 xmlParserInputState state;
3639 xmlParserInputPtr input = ctxt->input;
3640 int count = 0;
3641
3642 /*
3643 * Check that there is a comment right here.
3644 */
3645 if ((RAW != '<') || (NXT(1) != '!') ||
3646 (NXT(2) != '-') || (NXT(3) != '-')) return;
3647
3648 state = ctxt->instate;
3649 ctxt->instate = XML_PARSER_COMMENT;
3650 SHRINK;
3651 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003652 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003654 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003655 ctxt->instate = state;
3656 return;
3657 }
3658 q = CUR_CHAR(ql);
3659 NEXTL(ql);
3660 r = CUR_CHAR(rl);
3661 NEXTL(rl);
3662 cur = CUR_CHAR(l);
3663 len = 0;
3664 while (IS_CHAR(cur) && /* checked */
3665 ((cur != '>') ||
3666 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003667 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003668 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003669 }
3670 if (len + 5 >= size) {
3671 size *= 2;
3672 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3673 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003674 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003675 ctxt->instate = state;
3676 return;
3677 }
3678 }
3679 COPY_BUF(ql,buf,len,q);
3680 q = r;
3681 ql = rl;
3682 r = cur;
3683 rl = l;
3684
3685 count++;
3686 if (count > 50) {
3687 GROW;
3688 count = 0;
3689 }
3690 NEXTL(l);
3691 cur = CUR_CHAR(l);
3692 if (cur == 0) {
3693 SHRINK;
3694 GROW;
3695 cur = CUR_CHAR(l);
3696 }
3697 }
3698 buf[len] = 0;
3699 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003700 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003701 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003702 xmlFree(buf);
3703 } else {
3704 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003705 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3706 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003707 }
3708 NEXT;
3709 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3710 (!ctxt->disableSAX))
3711 ctxt->sax->comment(ctxt->userData, buf);
3712 xmlFree(buf);
3713 }
3714 ctxt->instate = state;
3715}
3716
3717/**
3718 * xmlParsePITarget:
3719 * @ctxt: an XML parser context
3720 *
3721 * parse the name of a PI
3722 *
3723 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3724 *
3725 * Returns the PITarget name or NULL
3726 */
3727
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003728const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003729xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003730 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003731
3732 name = xmlParseName(ctxt);
3733 if ((name != NULL) &&
3734 ((name[0] == 'x') || (name[0] == 'X')) &&
3735 ((name[1] == 'm') || (name[1] == 'M')) &&
3736 ((name[2] == 'l') || (name[2] == 'L'))) {
3737 int i;
3738 if ((name[0] == 'x') && (name[1] == 'm') &&
3739 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003740 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003741 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003742 return(name);
3743 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003744 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 return(name);
3746 }
3747 for (i = 0;;i++) {
3748 if (xmlW3CPIs[i] == NULL) break;
3749 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3750 return(name);
3751 }
3752 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3753 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3754 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003755 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 }
3758 return(name);
3759}
3760
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003761#ifdef LIBXML_CATALOG_ENABLED
3762/**
3763 * xmlParseCatalogPI:
3764 * @ctxt: an XML parser context
3765 * @catalog: the PI value string
3766 *
3767 * parse an XML Catalog Processing Instruction.
3768 *
3769 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3770 *
3771 * Occurs only if allowed by the user and if happening in the Misc
3772 * part of the document before any doctype informations
3773 * This will add the given catalog to the parsing context in order
3774 * to be used if there is a resolution need further down in the document
3775 */
3776
3777static void
3778xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3779 xmlChar *URL = NULL;
3780 const xmlChar *tmp, *base;
3781 xmlChar marker;
3782
3783 tmp = catalog;
3784 while (IS_BLANK(*tmp)) tmp++;
3785 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3786 goto error;
3787 tmp += 7;
3788 while (IS_BLANK(*tmp)) tmp++;
3789 if (*tmp != '=') {
3790 return;
3791 }
3792 tmp++;
3793 while (IS_BLANK(*tmp)) tmp++;
3794 marker = *tmp;
3795 if ((marker != '\'') && (marker != '"'))
3796 goto error;
3797 tmp++;
3798 base = tmp;
3799 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3800 if (*tmp == 0)
3801 goto error;
3802 URL = xmlStrndup(base, tmp - base);
3803 tmp++;
3804 while (IS_BLANK(*tmp)) tmp++;
3805 if (*tmp != 0)
3806 goto error;
3807
3808 if (URL != NULL) {
3809 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3810 xmlFree(URL);
3811 }
3812 return;
3813
3814error:
3815 ctxt->errNo = XML_WAR_CATALOG_PI;
3816 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3817 ctxt->sax->warning(ctxt->userData,
3818 "Catalog PI syntax error: %s\n", catalog);
3819 if (URL != NULL)
3820 xmlFree(URL);
3821}
3822#endif
3823
Owen Taylor3473f882001-02-23 17:55:21 +00003824/**
3825 * xmlParsePI:
3826 * @ctxt: an XML parser context
3827 *
3828 * parse an XML Processing Instruction.
3829 *
3830 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3831 *
3832 * The processing is transfered to SAX once parsed.
3833 */
3834
3835void
3836xmlParsePI(xmlParserCtxtPtr ctxt) {
3837 xmlChar *buf = NULL;
3838 int len = 0;
3839 int size = XML_PARSER_BUFFER_SIZE;
3840 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003841 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003842 xmlParserInputState state;
3843 int count = 0;
3844
3845 if ((RAW == '<') && (NXT(1) == '?')) {
3846 xmlParserInputPtr input = ctxt->input;
3847 state = ctxt->instate;
3848 ctxt->instate = XML_PARSER_PI;
3849 /*
3850 * this is a Processing Instruction.
3851 */
3852 SKIP(2);
3853 SHRINK;
3854
3855 /*
3856 * Parse the target name and check for special support like
3857 * namespace.
3858 */
3859 target = xmlParsePITarget(ctxt);
3860 if (target != NULL) {
3861 if ((RAW == '?') && (NXT(1) == '>')) {
3862 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003863 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3864 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
3866 SKIP(2);
3867
3868 /*
3869 * SAX: PI detected.
3870 */
3871 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3872 (ctxt->sax->processingInstruction != NULL))
3873 ctxt->sax->processingInstruction(ctxt->userData,
3874 target, NULL);
3875 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003876 return;
3877 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003878 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003879 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003880 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003881 ctxt->instate = state;
3882 return;
3883 }
3884 cur = CUR;
3885 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003886 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3887 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003888 }
3889 SKIP_BLANKS;
3890 cur = CUR_CHAR(l);
3891 while (IS_CHAR(cur) && /* checked */
3892 ((cur != '?') || (NXT(1) != '>'))) {
3893 if (len + 5 >= size) {
3894 size *= 2;
3895 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3896 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003897 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003898 ctxt->instate = state;
3899 return;
3900 }
3901 }
3902 count++;
3903 if (count > 50) {
3904 GROW;
3905 count = 0;
3906 }
3907 COPY_BUF(l,buf,len,cur);
3908 NEXTL(l);
3909 cur = CUR_CHAR(l);
3910 if (cur == 0) {
3911 SHRINK;
3912 GROW;
3913 cur = CUR_CHAR(l);
3914 }
3915 }
3916 buf[len] = 0;
3917 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003918 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3919 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003920 } else {
3921 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003922 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3923 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
3925 SKIP(2);
3926
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003927#ifdef LIBXML_CATALOG_ENABLED
3928 if (((state == XML_PARSER_MISC) ||
3929 (state == XML_PARSER_START)) &&
3930 (xmlStrEqual(target, XML_CATALOG_PI))) {
3931 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3932 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3933 (allow == XML_CATA_ALLOW_ALL))
3934 xmlParseCatalogPI(ctxt, buf);
3935 }
3936#endif
3937
3938
Owen Taylor3473f882001-02-23 17:55:21 +00003939 /*
3940 * SAX: PI detected.
3941 */
3942 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3943 (ctxt->sax->processingInstruction != NULL))
3944 ctxt->sax->processingInstruction(ctxt->userData,
3945 target, buf);
3946 }
3947 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003948 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003949 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003950 }
3951 ctxt->instate = state;
3952 }
3953}
3954
3955/**
3956 * xmlParseNotationDecl:
3957 * @ctxt: an XML parser context
3958 *
3959 * parse a notation declaration
3960 *
3961 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3962 *
3963 * Hence there is actually 3 choices:
3964 * 'PUBLIC' S PubidLiteral
3965 * 'PUBLIC' S PubidLiteral S SystemLiteral
3966 * and 'SYSTEM' S SystemLiteral
3967 *
3968 * See the NOTE on xmlParseExternalID().
3969 */
3970
3971void
3972xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003973 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003974 xmlChar *Pubid;
3975 xmlChar *Systemid;
3976
3977 if ((RAW == '<') && (NXT(1) == '!') &&
3978 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3979 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3980 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3981 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3982 xmlParserInputPtr input = ctxt->input;
3983 SHRINK;
3984 SKIP(10);
3985 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3987 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003988 return;
3989 }
3990 SKIP_BLANKS;
3991
Daniel Veillard76d66f42001-05-16 21:05:17 +00003992 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003993 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003994 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return;
3996 }
3997 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003999 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004000 return;
4001 }
4002 SKIP_BLANKS;
4003
4004 /*
4005 * Parse the IDs.
4006 */
4007 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4008 SKIP_BLANKS;
4009
4010 if (RAW == '>') {
4011 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4013 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004014 }
4015 NEXT;
4016 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4017 (ctxt->sax->notationDecl != NULL))
4018 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4019 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004020 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004021 }
Owen Taylor3473f882001-02-23 17:55:21 +00004022 if (Systemid != NULL) xmlFree(Systemid);
4023 if (Pubid != NULL) xmlFree(Pubid);
4024 }
4025}
4026
4027/**
4028 * xmlParseEntityDecl:
4029 * @ctxt: an XML parser context
4030 *
4031 * parse <!ENTITY declarations
4032 *
4033 * [70] EntityDecl ::= GEDecl | PEDecl
4034 *
4035 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4036 *
4037 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4038 *
4039 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4040 *
4041 * [74] PEDef ::= EntityValue | ExternalID
4042 *
4043 * [76] NDataDecl ::= S 'NDATA' S Name
4044 *
4045 * [ VC: Notation Declared ]
4046 * The Name must match the declared name of a notation.
4047 */
4048
4049void
4050xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004051 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 xmlChar *value = NULL;
4053 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004054 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004055 int isParameter = 0;
4056 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004057 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004058
4059 GROW;
4060 if ((RAW == '<') && (NXT(1) == '!') &&
4061 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4062 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4063 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
4064 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 SHRINK;
4066 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004067 skipped = SKIP_BLANKS;
4068 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004069 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4070 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004071 }
Owen Taylor3473f882001-02-23 17:55:21 +00004072
4073 if (RAW == '%') {
4074 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004075 skipped = SKIP_BLANKS;
4076 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4078 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004079 }
Owen Taylor3473f882001-02-23 17:55:21 +00004080 isParameter = 1;
4081 }
4082
Daniel Veillard76d66f42001-05-16 21:05:17 +00004083 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004084 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4086 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004087 return;
4088 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004089 skipped = SKIP_BLANKS;
4090 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4092 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004093 }
Owen Taylor3473f882001-02-23 17:55:21 +00004094
Daniel Veillardf5582f12002-06-11 10:08:16 +00004095 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004096 /*
4097 * handle the various case of definitions...
4098 */
4099 if (isParameter) {
4100 if ((RAW == '"') || (RAW == '\'')) {
4101 value = xmlParseEntityValue(ctxt, &orig);
4102 if (value) {
4103 if ((ctxt->sax != NULL) &&
4104 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4105 ctxt->sax->entityDecl(ctxt->userData, name,
4106 XML_INTERNAL_PARAMETER_ENTITY,
4107 NULL, NULL, value);
4108 }
4109 } else {
4110 URI = xmlParseExternalID(ctxt, &literal, 1);
4111 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004112 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004113 }
4114 if (URI) {
4115 xmlURIPtr uri;
4116
4117 uri = xmlParseURI((const char *) URI);
4118 if (uri == NULL) {
4119 ctxt->errNo = XML_ERR_INVALID_URI;
4120 if ((ctxt->sax != NULL) &&
4121 (!ctxt->disableSAX) &&
4122 (ctxt->sax->error != NULL))
4123 ctxt->sax->error(ctxt->userData,
4124 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004125 /*
4126 * This really ought to be a well formedness error
4127 * but the XML Core WG decided otherwise c.f. issue
4128 * E26 of the XML erratas.
4129 */
Owen Taylor3473f882001-02-23 17:55:21 +00004130 } else {
4131 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004132 /*
4133 * Okay this is foolish to block those but not
4134 * invalid URIs.
4135 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004136 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004137 } else {
4138 if ((ctxt->sax != NULL) &&
4139 (!ctxt->disableSAX) &&
4140 (ctxt->sax->entityDecl != NULL))
4141 ctxt->sax->entityDecl(ctxt->userData, name,
4142 XML_EXTERNAL_PARAMETER_ENTITY,
4143 literal, URI, NULL);
4144 }
4145 xmlFreeURI(uri);
4146 }
4147 }
4148 }
4149 } else {
4150 if ((RAW == '"') || (RAW == '\'')) {
4151 value = xmlParseEntityValue(ctxt, &orig);
4152 if ((ctxt->sax != NULL) &&
4153 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4154 ctxt->sax->entityDecl(ctxt->userData, name,
4155 XML_INTERNAL_GENERAL_ENTITY,
4156 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004157 /*
4158 * For expat compatibility in SAX mode.
4159 */
4160 if ((ctxt->myDoc == NULL) ||
4161 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4162 if (ctxt->myDoc == NULL) {
4163 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4164 }
4165 if (ctxt->myDoc->intSubset == NULL)
4166 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4167 BAD_CAST "fake", NULL, NULL);
4168
Daniel Veillard1af9a412003-08-20 22:54:39 +00004169 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4170 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004171 }
Owen Taylor3473f882001-02-23 17:55:21 +00004172 } else {
4173 URI = xmlParseExternalID(ctxt, &literal, 1);
4174 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004175 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 }
4177 if (URI) {
4178 xmlURIPtr uri;
4179
4180 uri = xmlParseURI((const char *)URI);
4181 if (uri == NULL) {
4182 ctxt->errNo = XML_ERR_INVALID_URI;
4183 if ((ctxt->sax != NULL) &&
4184 (!ctxt->disableSAX) &&
4185 (ctxt->sax->error != NULL))
4186 ctxt->sax->error(ctxt->userData,
4187 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004188 /*
4189 * This really ought to be a well formedness error
4190 * but the XML Core WG decided otherwise c.f. issue
4191 * E26 of the XML erratas.
4192 */
Owen Taylor3473f882001-02-23 17:55:21 +00004193 } else {
4194 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004195 /*
4196 * Okay this is foolish to block those but not
4197 * invalid URIs.
4198 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004199 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004200 }
4201 xmlFreeURI(uri);
4202 }
4203 }
4204 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004205 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4206 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004207 }
4208 SKIP_BLANKS;
4209 if ((RAW == 'N') && (NXT(1) == 'D') &&
4210 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4211 (NXT(4) == 'A')) {
4212 SKIP(5);
4213 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004214 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4215 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004218 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004219 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4220 (ctxt->sax->unparsedEntityDecl != NULL))
4221 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4222 literal, URI, ndata);
4223 } else {
4224 if ((ctxt->sax != NULL) &&
4225 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4226 ctxt->sax->entityDecl(ctxt->userData, name,
4227 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4228 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004229 /*
4230 * For expat compatibility in SAX mode.
4231 * assuming the entity repalcement was asked for
4232 */
4233 if ((ctxt->replaceEntities != 0) &&
4234 ((ctxt->myDoc == NULL) ||
4235 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4236 if (ctxt->myDoc == NULL) {
4237 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4238 }
4239
4240 if (ctxt->myDoc->intSubset == NULL)
4241 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4242 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004243 xmlSAX2EntityDecl(ctxt, name,
4244 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4245 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004246 }
Owen Taylor3473f882001-02-23 17:55:21 +00004247 }
4248 }
4249 }
4250 SKIP_BLANKS;
4251 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004252 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004253 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 } else {
4255 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004256 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4257 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004258 }
4259 NEXT;
4260 }
4261 if (orig != NULL) {
4262 /*
4263 * Ugly mechanism to save the raw entity value.
4264 */
4265 xmlEntityPtr cur = NULL;
4266
4267 if (isParameter) {
4268 if ((ctxt->sax != NULL) &&
4269 (ctxt->sax->getParameterEntity != NULL))
4270 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4271 } else {
4272 if ((ctxt->sax != NULL) &&
4273 (ctxt->sax->getEntity != NULL))
4274 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004275 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004276 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004277 }
Owen Taylor3473f882001-02-23 17:55:21 +00004278 }
4279 if (cur != NULL) {
4280 if (cur->orig != NULL)
4281 xmlFree(orig);
4282 else
4283 cur->orig = orig;
4284 } else
4285 xmlFree(orig);
4286 }
Owen Taylor3473f882001-02-23 17:55:21 +00004287 if (value != NULL) xmlFree(value);
4288 if (URI != NULL) xmlFree(URI);
4289 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004290 }
4291}
4292
4293/**
4294 * xmlParseDefaultDecl:
4295 * @ctxt: an XML parser context
4296 * @value: Receive a possible fixed default value for the attribute
4297 *
4298 * Parse an attribute default declaration
4299 *
4300 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4301 *
4302 * [ VC: Required Attribute ]
4303 * if the default declaration is the keyword #REQUIRED, then the
4304 * attribute must be specified for all elements of the type in the
4305 * attribute-list declaration.
4306 *
4307 * [ VC: Attribute Default Legal ]
4308 * The declared default value must meet the lexical constraints of
4309 * the declared attribute type c.f. xmlValidateAttributeDecl()
4310 *
4311 * [ VC: Fixed Attribute Default ]
4312 * if an attribute has a default value declared with the #FIXED
4313 * keyword, instances of that attribute must match the default value.
4314 *
4315 * [ WFC: No < in Attribute Values ]
4316 * handled in xmlParseAttValue()
4317 *
4318 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4319 * or XML_ATTRIBUTE_FIXED.
4320 */
4321
4322int
4323xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4324 int val;
4325 xmlChar *ret;
4326
4327 *value = NULL;
4328 if ((RAW == '#') && (NXT(1) == 'R') &&
4329 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4330 (NXT(4) == 'U') && (NXT(5) == 'I') &&
4331 (NXT(6) == 'R') && (NXT(7) == 'E') &&
4332 (NXT(8) == 'D')) {
4333 SKIP(9);
4334 return(XML_ATTRIBUTE_REQUIRED);
4335 }
4336 if ((RAW == '#') && (NXT(1) == 'I') &&
4337 (NXT(2) == 'M') && (NXT(3) == 'P') &&
4338 (NXT(4) == 'L') && (NXT(5) == 'I') &&
4339 (NXT(6) == 'E') && (NXT(7) == 'D')) {
4340 SKIP(8);
4341 return(XML_ATTRIBUTE_IMPLIED);
4342 }
4343 val = XML_ATTRIBUTE_NONE;
4344 if ((RAW == '#') && (NXT(1) == 'F') &&
4345 (NXT(2) == 'I') && (NXT(3) == 'X') &&
4346 (NXT(4) == 'E') && (NXT(5) == 'D')) {
4347 SKIP(6);
4348 val = XML_ATTRIBUTE_FIXED;
4349 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004350 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4351 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004352 }
4353 SKIP_BLANKS;
4354 }
4355 ret = xmlParseAttValue(ctxt);
4356 ctxt->instate = XML_PARSER_DTD;
4357 if (ret == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004358 xmlFatalErrMsg(ctxt, ctxt->errNo,
4359 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004360 } else
4361 *value = ret;
4362 return(val);
4363}
4364
4365/**
4366 * xmlParseNotationType:
4367 * @ctxt: an XML parser context
4368 *
4369 * parse an Notation attribute type.
4370 *
4371 * Note: the leading 'NOTATION' S part has already being parsed...
4372 *
4373 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4374 *
4375 * [ VC: Notation Attributes ]
4376 * Values of this type must match one of the notation names included
4377 * in the declaration; all notation names in the declaration must be declared.
4378 *
4379 * Returns: the notation attribute tree built while parsing
4380 */
4381
4382xmlEnumerationPtr
4383xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004384 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004385 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4386
4387 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004388 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004389 return(NULL);
4390 }
4391 SHRINK;
4392 do {
4393 NEXT;
4394 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004395 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004396 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004397 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4398 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004399 return(ret);
4400 }
4401 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004402 if (cur == NULL) return(ret);
4403 if (last == NULL) ret = last = cur;
4404 else {
4405 last->next = cur;
4406 last = cur;
4407 }
4408 SKIP_BLANKS;
4409 } while (RAW == '|');
4410 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004411 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004412 if ((last != NULL) && (last != ret))
4413 xmlFreeEnumeration(last);
4414 return(ret);
4415 }
4416 NEXT;
4417 return(ret);
4418}
4419
4420/**
4421 * xmlParseEnumerationType:
4422 * @ctxt: an XML parser context
4423 *
4424 * parse an Enumeration attribute type.
4425 *
4426 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4427 *
4428 * [ VC: Enumeration ]
4429 * Values of this type must match one of the Nmtoken tokens in
4430 * the declaration
4431 *
4432 * Returns: the enumeration attribute tree built while parsing
4433 */
4434
4435xmlEnumerationPtr
4436xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4437 xmlChar *name;
4438 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4439
4440 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004441 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 return(NULL);
4443 }
4444 SHRINK;
4445 do {
4446 NEXT;
4447 SKIP_BLANKS;
4448 name = xmlParseNmtoken(ctxt);
4449 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004450 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004451 return(ret);
4452 }
4453 cur = xmlCreateEnumeration(name);
4454 xmlFree(name);
4455 if (cur == NULL) return(ret);
4456 if (last == NULL) ret = last = cur;
4457 else {
4458 last->next = cur;
4459 last = cur;
4460 }
4461 SKIP_BLANKS;
4462 } while (RAW == '|');
4463 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004464 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004465 return(ret);
4466 }
4467 NEXT;
4468 return(ret);
4469}
4470
4471/**
4472 * xmlParseEnumeratedType:
4473 * @ctxt: an XML parser context
4474 * @tree: the enumeration tree built while parsing
4475 *
4476 * parse an Enumerated attribute type.
4477 *
4478 * [57] EnumeratedType ::= NotationType | Enumeration
4479 *
4480 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4481 *
4482 *
4483 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4484 */
4485
4486int
4487xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4488 if ((RAW == 'N') && (NXT(1) == 'O') &&
4489 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4490 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4491 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4492 SKIP(8);
4493 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4495 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004496 return(0);
4497 }
4498 SKIP_BLANKS;
4499 *tree = xmlParseNotationType(ctxt);
4500 if (*tree == NULL) return(0);
4501 return(XML_ATTRIBUTE_NOTATION);
4502 }
4503 *tree = xmlParseEnumerationType(ctxt);
4504 if (*tree == NULL) return(0);
4505 return(XML_ATTRIBUTE_ENUMERATION);
4506}
4507
4508/**
4509 * xmlParseAttributeType:
4510 * @ctxt: an XML parser context
4511 * @tree: the enumeration tree built while parsing
4512 *
4513 * parse the Attribute list def for an element
4514 *
4515 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4516 *
4517 * [55] StringType ::= 'CDATA'
4518 *
4519 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4520 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4521 *
4522 * Validity constraints for attribute values syntax are checked in
4523 * xmlValidateAttributeValue()
4524 *
4525 * [ VC: ID ]
4526 * Values of type ID must match the Name production. A name must not
4527 * appear more than once in an XML document as a value of this type;
4528 * i.e., ID values must uniquely identify the elements which bear them.
4529 *
4530 * [ VC: One ID per Element Type ]
4531 * No element type may have more than one ID attribute specified.
4532 *
4533 * [ VC: ID Attribute Default ]
4534 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4535 *
4536 * [ VC: IDREF ]
4537 * Values of type IDREF must match the Name production, and values
4538 * of type IDREFS must match Names; each IDREF Name must match the value
4539 * of an ID attribute on some element in the XML document; i.e. IDREF
4540 * values must match the value of some ID attribute.
4541 *
4542 * [ VC: Entity Name ]
4543 * Values of type ENTITY must match the Name production, values
4544 * of type ENTITIES must match Names; each Entity Name must match the
4545 * name of an unparsed entity declared in the DTD.
4546 *
4547 * [ VC: Name Token ]
4548 * Values of type NMTOKEN must match the Nmtoken production; values
4549 * of type NMTOKENS must match Nmtokens.
4550 *
4551 * Returns the attribute type
4552 */
4553int
4554xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4555 SHRINK;
4556 if ((RAW == 'C') && (NXT(1) == 'D') &&
4557 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4558 (NXT(4) == 'A')) {
4559 SKIP(5);
4560 return(XML_ATTRIBUTE_CDATA);
4561 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4562 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4563 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4564 SKIP(6);
4565 return(XML_ATTRIBUTE_IDREFS);
4566 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4567 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4568 (NXT(4) == 'F')) {
4569 SKIP(5);
4570 return(XML_ATTRIBUTE_IDREF);
4571 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4572 SKIP(2);
4573 return(XML_ATTRIBUTE_ID);
4574 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4575 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4576 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4577 SKIP(6);
4578 return(XML_ATTRIBUTE_ENTITY);
4579 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4580 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4581 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4582 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4583 SKIP(8);
4584 return(XML_ATTRIBUTE_ENTITIES);
4585 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4586 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4587 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4588 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4589 SKIP(8);
4590 return(XML_ATTRIBUTE_NMTOKENS);
4591 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4592 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4593 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4594 (NXT(6) == 'N')) {
4595 SKIP(7);
4596 return(XML_ATTRIBUTE_NMTOKEN);
4597 }
4598 return(xmlParseEnumeratedType(ctxt, tree));
4599}
4600
4601/**
4602 * xmlParseAttributeListDecl:
4603 * @ctxt: an XML parser context
4604 *
4605 * : parse the Attribute list def for an element
4606 *
4607 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4608 *
4609 * [53] AttDef ::= S Name S AttType S DefaultDecl
4610 *
4611 */
4612void
4613xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004614 const xmlChar *elemName;
4615 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 xmlEnumerationPtr tree;
4617
4618 if ((RAW == '<') && (NXT(1) == '!') &&
4619 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4620 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4621 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4622 (NXT(8) == 'T')) {
4623 xmlParserInputPtr input = ctxt->input;
4624
4625 SKIP(9);
4626 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004627 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004628 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004629 }
4630 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004631 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004633 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4634 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004635 return;
4636 }
4637 SKIP_BLANKS;
4638 GROW;
4639 while (RAW != '>') {
4640 const xmlChar *check = CUR_PTR;
4641 int type;
4642 int def;
4643 xmlChar *defaultValue = NULL;
4644
4645 GROW;
4646 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004647 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004649 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4650 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004651 break;
4652 }
4653 GROW;
4654 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004655 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004656 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004657 if (defaultValue != NULL)
4658 xmlFree(defaultValue);
4659 break;
4660 }
4661 SKIP_BLANKS;
4662
4663 type = xmlParseAttributeType(ctxt, &tree);
4664 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004665 if (defaultValue != NULL)
4666 xmlFree(defaultValue);
4667 break;
4668 }
4669
4670 GROW;
4671 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4673 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (defaultValue != NULL)
4675 xmlFree(defaultValue);
4676 if (tree != NULL)
4677 xmlFreeEnumeration(tree);
4678 break;
4679 }
4680 SKIP_BLANKS;
4681
4682 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4683 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004684 if (defaultValue != NULL)
4685 xmlFree(defaultValue);
4686 if (tree != NULL)
4687 xmlFreeEnumeration(tree);
4688 break;
4689 }
4690
4691 GROW;
4692 if (RAW != '>') {
4693 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004695 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004696 if (defaultValue != NULL)
4697 xmlFree(defaultValue);
4698 if (tree != NULL)
4699 xmlFreeEnumeration(tree);
4700 break;
4701 }
4702 SKIP_BLANKS;
4703 }
4704 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004705 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4706 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004707 if (defaultValue != NULL)
4708 xmlFree(defaultValue);
4709 if (tree != NULL)
4710 xmlFreeEnumeration(tree);
4711 break;
4712 }
4713 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4714 (ctxt->sax->attributeDecl != NULL))
4715 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4716 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004717 else if (tree != NULL)
4718 xmlFreeEnumeration(tree);
4719
4720 if ((ctxt->sax2) && (defaultValue != NULL) &&
4721 (def != XML_ATTRIBUTE_IMPLIED) &&
4722 (def != XML_ATTRIBUTE_REQUIRED)) {
4723 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4724 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004725 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4726 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4727 }
Owen Taylor3473f882001-02-23 17:55:21 +00004728 if (defaultValue != NULL)
4729 xmlFree(defaultValue);
4730 GROW;
4731 }
4732 if (RAW == '>') {
4733 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004734 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4735 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004736 }
4737 NEXT;
4738 }
Owen Taylor3473f882001-02-23 17:55:21 +00004739 }
4740}
4741
4742/**
4743 * xmlParseElementMixedContentDecl:
4744 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004745 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004746 *
4747 * parse the declaration for a Mixed Element content
4748 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4749 *
4750 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4751 * '(' S? '#PCDATA' S? ')'
4752 *
4753 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4754 *
4755 * [ VC: No Duplicate Types ]
4756 * The same name must not appear more than once in a single
4757 * mixed-content declaration.
4758 *
4759 * returns: the list of the xmlElementContentPtr describing the element choices
4760 */
4761xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004763 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004764 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004765
4766 GROW;
4767 if ((RAW == '#') && (NXT(1) == 'P') &&
4768 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4769 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4770 (NXT(6) == 'A')) {
4771 SKIP(7);
4772 SKIP_BLANKS;
4773 SHRINK;
4774 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004775 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004776 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4777 if (ctxt->vctxt.error != NULL)
4778 ctxt->vctxt.error(ctxt->vctxt.userData,
4779"Element content declaration doesn't start and stop in the same entity\n");
4780 ctxt->valid = 0;
4781 }
Owen Taylor3473f882001-02-23 17:55:21 +00004782 NEXT;
4783 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4784 if (RAW == '*') {
4785 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4786 NEXT;
4787 }
4788 return(ret);
4789 }
4790 if ((RAW == '(') || (RAW == '|')) {
4791 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4792 if (ret == NULL) return(NULL);
4793 }
4794 while (RAW == '|') {
4795 NEXT;
4796 if (elem == NULL) {
4797 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4798 if (ret == NULL) return(NULL);
4799 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004800 if (cur != NULL)
4801 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004802 cur = ret;
4803 } else {
4804 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4805 if (n == NULL) return(NULL);
4806 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004807 if (n->c1 != NULL)
4808 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004809 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004810 if (n != NULL)
4811 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004812 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004813 }
4814 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004815 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004816 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004817 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004818 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004819 xmlFreeElementContent(cur);
4820 return(NULL);
4821 }
4822 SKIP_BLANKS;
4823 GROW;
4824 }
4825 if ((RAW == ')') && (NXT(1) == '*')) {
4826 if (elem != NULL) {
4827 cur->c2 = xmlNewElementContent(elem,
4828 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004829 if (cur->c2 != NULL)
4830 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004831 }
4832 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004833 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004834 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4835 if (ctxt->vctxt.error != NULL)
4836 ctxt->vctxt.error(ctxt->vctxt.userData,
4837"Element content declaration doesn't start and stop in the same entity\n");
4838 ctxt->valid = 0;
4839 }
Owen Taylor3473f882001-02-23 17:55:21 +00004840 SKIP(2);
4841 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004842 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004843 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004844 return(NULL);
4845 }
4846
4847 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004848 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004849 }
4850 return(ret);
4851}
4852
4853/**
4854 * xmlParseElementChildrenContentDecl:
4855 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004856 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004857 *
4858 * parse the declaration for a Mixed Element content
4859 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4860 *
4861 *
4862 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4863 *
4864 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4865 *
4866 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4867 *
4868 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4869 *
4870 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4871 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004872 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004873 * opening or closing parentheses in a choice, seq, or Mixed
4874 * construct is contained in the replacement text for a parameter
4875 * entity, both must be contained in the same replacement text. For
4876 * interoperability, if a parameter-entity reference appears in a
4877 * choice, seq, or Mixed construct, its replacement text should not
4878 * be empty, and neither the first nor last non-blank character of
4879 * the replacement text should be a connector (| or ,).
4880 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004881 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004882 * hierarchy.
4883 */
4884xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004885xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004886 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004887 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004888 xmlChar type = 0;
4889
4890 SKIP_BLANKS;
4891 GROW;
4892 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004893 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004894
Owen Taylor3473f882001-02-23 17:55:21 +00004895 /* Recurse on first child */
4896 NEXT;
4897 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 SKIP_BLANKS;
4900 GROW;
4901 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004902 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004904 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004905 return(NULL);
4906 }
4907 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004908 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004909 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004910 return(NULL);
4911 }
Owen Taylor3473f882001-02-23 17:55:21 +00004912 GROW;
4913 if (RAW == '?') {
4914 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4915 NEXT;
4916 } else if (RAW == '*') {
4917 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4918 NEXT;
4919 } else if (RAW == '+') {
4920 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4921 NEXT;
4922 } else {
4923 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4924 }
Owen Taylor3473f882001-02-23 17:55:21 +00004925 GROW;
4926 }
4927 SKIP_BLANKS;
4928 SHRINK;
4929 while (RAW != ')') {
4930 /*
4931 * Each loop we parse one separator and one element.
4932 */
4933 if (RAW == ',') {
4934 if (type == 0) type = CUR;
4935
4936 /*
4937 * Detect "Name | Name , Name" error
4938 */
4939 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004940 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004941 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004942 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004943 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004944 xmlFreeElementContent(last);
4945 if (ret != NULL)
4946 xmlFreeElementContent(ret);
4947 return(NULL);
4948 }
4949 NEXT;
4950
4951 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4952 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004953 if ((last != NULL) && (last != ret))
4954 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 xmlFreeElementContent(ret);
4956 return(NULL);
4957 }
4958 if (last == NULL) {
4959 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004960 if (ret != NULL)
4961 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004962 ret = cur = op;
4963 } else {
4964 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004965 if (op != NULL)
4966 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004967 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004968 if (last != NULL)
4969 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004970 cur =op;
4971 last = NULL;
4972 }
4973 } else if (RAW == '|') {
4974 if (type == 0) type = CUR;
4975
4976 /*
4977 * Detect "Name , Name | Name" error
4978 */
4979 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004980 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004981 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004982 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004983 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004984 xmlFreeElementContent(last);
4985 if (ret != NULL)
4986 xmlFreeElementContent(ret);
4987 return(NULL);
4988 }
4989 NEXT;
4990
4991 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4992 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004993 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004994 xmlFreeElementContent(last);
4995 if (ret != NULL)
4996 xmlFreeElementContent(ret);
4997 return(NULL);
4998 }
4999 if (last == NULL) {
5000 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005001 if (ret != NULL)
5002 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005003 ret = cur = op;
5004 } else {
5005 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005006 if (op != NULL)
5007 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005009 if (last != NULL)
5010 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005011 cur =op;
5012 last = NULL;
5013 }
5014 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005015 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005016 if (ret != NULL)
5017 xmlFreeElementContent(ret);
5018 return(NULL);
5019 }
5020 GROW;
5021 SKIP_BLANKS;
5022 GROW;
5023 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005024 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005025 /* Recurse on second child */
5026 NEXT;
5027 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005028 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 SKIP_BLANKS;
5030 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005031 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005032 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005033 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 if (ret != NULL)
5035 xmlFreeElementContent(ret);
5036 return(NULL);
5037 }
5038 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005039 if (RAW == '?') {
5040 last->ocur = XML_ELEMENT_CONTENT_OPT;
5041 NEXT;
5042 } else if (RAW == '*') {
5043 last->ocur = XML_ELEMENT_CONTENT_MULT;
5044 NEXT;
5045 } else if (RAW == '+') {
5046 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5047 NEXT;
5048 } else {
5049 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5050 }
5051 }
5052 SKIP_BLANKS;
5053 GROW;
5054 }
5055 if ((cur != NULL) && (last != NULL)) {
5056 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005057 if (last != NULL)
5058 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005059 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005060 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005061 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5062 if (ctxt->vctxt.error != NULL)
5063 ctxt->vctxt.error(ctxt->vctxt.userData,
5064"Element content declaration doesn't start and stop in the same entity\n");
5065 ctxt->valid = 0;
5066 }
Owen Taylor3473f882001-02-23 17:55:21 +00005067 NEXT;
5068 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005069 if (ret != NULL)
5070 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005071 NEXT;
5072 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005073 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005074 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005075 cur = ret;
5076 /*
5077 * Some normalization:
5078 * (a | b* | c?)* == (a | b | c)*
5079 */
5080 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5081 if ((cur->c1 != NULL) &&
5082 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5083 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5084 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5085 if ((cur->c2 != NULL) &&
5086 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5087 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5088 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5089 cur = cur->c2;
5090 }
5091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 NEXT;
5093 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005094 if (ret != NULL) {
5095 int found = 0;
5096
Daniel Veillarde470df72001-04-18 21:41:07 +00005097 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005098 /*
5099 * Some normalization:
5100 * (a | b*)+ == (a | b)*
5101 * (a | b?)+ == (a | b)*
5102 */
5103 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5104 if ((cur->c1 != NULL) &&
5105 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5106 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5107 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5108 found = 1;
5109 }
5110 if ((cur->c2 != NULL) &&
5111 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5112 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5113 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5114 found = 1;
5115 }
5116 cur = cur->c2;
5117 }
5118 if (found)
5119 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5120 }
Owen Taylor3473f882001-02-23 17:55:21 +00005121 NEXT;
5122 }
5123 return(ret);
5124}
5125
5126/**
5127 * xmlParseElementContentDecl:
5128 * @ctxt: an XML parser context
5129 * @name: the name of the element being defined.
5130 * @result: the Element Content pointer will be stored here if any
5131 *
5132 * parse the declaration for an Element content either Mixed or Children,
5133 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5134 *
5135 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5136 *
5137 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5138 */
5139
5140int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005141xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005142 xmlElementContentPtr *result) {
5143
5144 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 int res;
5147
5148 *result = NULL;
5149
5150 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005151 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005152 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 return(-1);
5154 }
5155 NEXT;
5156 GROW;
5157 SKIP_BLANKS;
5158 if ((RAW == '#') && (NXT(1) == 'P') &&
5159 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5160 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5161 (NXT(6) == 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005162 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 res = XML_ELEMENT_TYPE_MIXED;
5164 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005165 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005166 res = XML_ELEMENT_TYPE_ELEMENT;
5167 }
Owen Taylor3473f882001-02-23 17:55:21 +00005168 SKIP_BLANKS;
5169 *result = tree;
5170 return(res);
5171}
5172
5173/**
5174 * xmlParseElementDecl:
5175 * @ctxt: an XML parser context
5176 *
5177 * parse an Element declaration.
5178 *
5179 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5180 *
5181 * [ VC: Unique Element Type Declaration ]
5182 * No element type may be declared more than once
5183 *
5184 * Returns the type of the element, or -1 in case of error
5185 */
5186int
5187xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005188 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005189 int ret = -1;
5190 xmlElementContentPtr content = NULL;
5191
5192 GROW;
5193 if ((RAW == '<') && (NXT(1) == '!') &&
5194 (NXT(2) == 'E') && (NXT(3) == 'L') &&
5195 (NXT(4) == 'E') && (NXT(5) == 'M') &&
5196 (NXT(6) == 'E') && (NXT(7) == 'N') &&
5197 (NXT(8) == 'T')) {
5198 xmlParserInputPtr input = ctxt->input;
5199
5200 SKIP(9);
5201 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005202 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5203 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005204 }
5205 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005206 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005207 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005208 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5209 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005210 return(-1);
5211 }
5212 while ((RAW == 0) && (ctxt->inputNr > 1))
5213 xmlPopInput(ctxt);
5214 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5216 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005217 }
5218 SKIP_BLANKS;
5219 if ((RAW == 'E') && (NXT(1) == 'M') &&
5220 (NXT(2) == 'P') && (NXT(3) == 'T') &&
5221 (NXT(4) == 'Y')) {
5222 SKIP(5);
5223 /*
5224 * Element must always be empty.
5225 */
5226 ret = XML_ELEMENT_TYPE_EMPTY;
5227 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5228 (NXT(2) == 'Y')) {
5229 SKIP(3);
5230 /*
5231 * Element is a generic container.
5232 */
5233 ret = XML_ELEMENT_TYPE_ANY;
5234 } else if (RAW == '(') {
5235 ret = xmlParseElementContentDecl(ctxt, name, &content);
5236 } else {
5237 /*
5238 * [ WFC: PEs in Internal Subset ] error handling.
5239 */
5240 if ((RAW == '%') && (ctxt->external == 0) &&
5241 (ctxt->inputNr == 1)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005242 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
5243 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5244 ctxt->sax->error(ctxt->userData,
5245 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005246 } else {
5247 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
5248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5249 ctxt->sax->error(ctxt->userData,
5250 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5251 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005252 ctxt->wellFormed = 0;
5253 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005254 return(-1);
5255 }
5256
5257 SKIP_BLANKS;
5258 /*
5259 * Pop-up of finished entities.
5260 */
5261 while ((RAW == 0) && (ctxt->inputNr > 1))
5262 xmlPopInput(ctxt);
5263 SKIP_BLANKS;
5264
5265 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005266 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005267 } else {
5268 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5270 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005271 }
5272
5273 NEXT;
5274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5275 (ctxt->sax->elementDecl != NULL))
5276 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5277 content);
5278 }
5279 if (content != NULL) {
5280 xmlFreeElementContent(content);
5281 }
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
5283 return(ret);
5284}
5285
5286/**
Owen Taylor3473f882001-02-23 17:55:21 +00005287 * xmlParseConditionalSections
5288 * @ctxt: an XML parser context
5289 *
5290 * [61] conditionalSect ::= includeSect | ignoreSect
5291 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5292 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5293 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5294 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5295 */
5296
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005297static void
Owen Taylor3473f882001-02-23 17:55:21 +00005298xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5299 SKIP(3);
5300 SKIP_BLANKS;
5301 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5302 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5303 (NXT(6) == 'E')) {
5304 SKIP(7);
5305 SKIP_BLANKS;
5306 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 } else {
5309 NEXT;
5310 }
5311 if (xmlParserDebugEntities) {
5312 if ((ctxt->input != NULL) && (ctxt->input->filename))
5313 xmlGenericError(xmlGenericErrorContext,
5314 "%s(%d): ", ctxt->input->filename,
5315 ctxt->input->line);
5316 xmlGenericError(xmlGenericErrorContext,
5317 "Entering INCLUDE Conditional Section\n");
5318 }
5319
5320 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5321 (NXT(2) != '>'))) {
5322 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005323 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005324
5325 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5326 xmlParseConditionalSections(ctxt);
5327 } else if (IS_BLANK(CUR)) {
5328 NEXT;
5329 } else if (RAW == '%') {
5330 xmlParsePEReference(ctxt);
5331 } else
5332 xmlParseMarkupDecl(ctxt);
5333
5334 /*
5335 * Pop-up of finished entities.
5336 */
5337 while ((RAW == 0) && (ctxt->inputNr > 1))
5338 xmlPopInput(ctxt);
5339
Daniel Veillardfdc91562002-07-01 21:52:03 +00005340 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005341 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005342 break;
5343 }
5344 }
5345 if (xmlParserDebugEntities) {
5346 if ((ctxt->input != NULL) && (ctxt->input->filename))
5347 xmlGenericError(xmlGenericErrorContext,
5348 "%s(%d): ", ctxt->input->filename,
5349 ctxt->input->line);
5350 xmlGenericError(xmlGenericErrorContext,
5351 "Leaving INCLUDE Conditional Section\n");
5352 }
5353
5354 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5355 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5356 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005357 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005358 int depth = 0;
5359
5360 SKIP(6);
5361 SKIP_BLANKS;
5362 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005363 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005364 } else {
5365 NEXT;
5366 }
5367 if (xmlParserDebugEntities) {
5368 if ((ctxt->input != NULL) && (ctxt->input->filename))
5369 xmlGenericError(xmlGenericErrorContext,
5370 "%s(%d): ", ctxt->input->filename,
5371 ctxt->input->line);
5372 xmlGenericError(xmlGenericErrorContext,
5373 "Entering IGNORE Conditional Section\n");
5374 }
5375
5376 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005377 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005378 * But disable SAX event generating DTD building in the meantime
5379 */
5380 state = ctxt->disableSAX;
5381 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005382 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005383 ctxt->instate = XML_PARSER_IGNORE;
5384
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005385 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005386 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5387 depth++;
5388 SKIP(3);
5389 continue;
5390 }
5391 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5392 if (--depth >= 0) SKIP(3);
5393 continue;
5394 }
5395 NEXT;
5396 continue;
5397 }
5398
5399 ctxt->disableSAX = state;
5400 ctxt->instate = instate;
5401
5402 if (xmlParserDebugEntities) {
5403 if ((ctxt->input != NULL) && (ctxt->input->filename))
5404 xmlGenericError(xmlGenericErrorContext,
5405 "%s(%d): ", ctxt->input->filename,
5406 ctxt->input->line);
5407 xmlGenericError(xmlGenericErrorContext,
5408 "Leaving IGNORE Conditional Section\n");
5409 }
5410
5411 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005412 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 }
5414
5415 if (RAW == 0)
5416 SHRINK;
5417
5418 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005419 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 } else {
5421 SKIP(3);
5422 }
5423}
5424
5425/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005426 * xmlParseMarkupDecl:
5427 * @ctxt: an XML parser context
5428 *
5429 * parse Markup declarations
5430 *
5431 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5432 * NotationDecl | PI | Comment
5433 *
5434 * [ VC: Proper Declaration/PE Nesting ]
5435 * Parameter-entity replacement text must be properly nested with
5436 * markup declarations. That is to say, if either the first character
5437 * or the last character of a markup declaration (markupdecl above) is
5438 * contained in the replacement text for a parameter-entity reference,
5439 * both must be contained in the same replacement text.
5440 *
5441 * [ WFC: PEs in Internal Subset ]
5442 * In the internal DTD subset, parameter-entity references can occur
5443 * only where markup declarations can occur, not within markup declarations.
5444 * (This does not apply to references that occur in external parameter
5445 * entities or to the external subset.)
5446 */
5447void
5448xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5449 GROW;
5450 xmlParseElementDecl(ctxt);
5451 xmlParseAttributeListDecl(ctxt);
5452 xmlParseEntityDecl(ctxt);
5453 xmlParseNotationDecl(ctxt);
5454 xmlParsePI(ctxt);
5455 xmlParseComment(ctxt);
5456 /*
5457 * This is only for internal subset. On external entities,
5458 * the replacement is done before parsing stage
5459 */
5460 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5461 xmlParsePEReference(ctxt);
5462
5463 /*
5464 * Conditional sections are allowed from entities included
5465 * by PE References in the internal subset.
5466 */
5467 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5468 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5469 xmlParseConditionalSections(ctxt);
5470 }
5471 }
5472
5473 ctxt->instate = XML_PARSER_DTD;
5474}
5475
5476/**
5477 * xmlParseTextDecl:
5478 * @ctxt: an XML parser context
5479 *
5480 * parse an XML declaration header for external entities
5481 *
5482 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5483 *
5484 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5485 */
5486
5487void
5488xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5489 xmlChar *version;
5490
5491 /*
5492 * We know that '<?xml' is here.
5493 */
5494 if ((RAW == '<') && (NXT(1) == '?') &&
5495 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5496 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5497 SKIP(5);
5498 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005499 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005500 return;
5501 }
5502
5503 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5505 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005506 }
5507 SKIP_BLANKS;
5508
5509 /*
5510 * We may have the VersionInfo here.
5511 */
5512 version = xmlParseVersionInfo(ctxt);
5513 if (version == NULL)
5514 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005515 else {
5516 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5518 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005519 }
5520 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005521 ctxt->input->version = version;
5522
5523 /*
5524 * We must have the encoding declaration
5525 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005526 xmlParseEncodingDecl(ctxt);
5527 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5528 /*
5529 * The XML REC instructs us to stop parsing right here
5530 */
5531 return;
5532 }
5533
5534 SKIP_BLANKS;
5535 if ((RAW == '?') && (NXT(1) == '>')) {
5536 SKIP(2);
5537 } else if (RAW == '>') {
5538 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005539 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005540 NEXT;
5541 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005542 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005543 MOVETO_ENDTAG(CUR_PTR);
5544 NEXT;
5545 }
5546}
5547
5548/**
Owen Taylor3473f882001-02-23 17:55:21 +00005549 * xmlParseExternalSubset:
5550 * @ctxt: an XML parser context
5551 * @ExternalID: the external identifier
5552 * @SystemID: the system identifier (or URL)
5553 *
5554 * parse Markup declarations from an external subset
5555 *
5556 * [30] extSubset ::= textDecl? extSubsetDecl
5557 *
5558 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5559 */
5560void
5561xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5562 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005563 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 GROW;
5565 if ((RAW == '<') && (NXT(1) == '?') &&
5566 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5567 (NXT(4) == 'l')) {
5568 xmlParseTextDecl(ctxt);
5569 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5570 /*
5571 * The XML REC instructs us to stop parsing right here
5572 */
5573 ctxt->instate = XML_PARSER_EOF;
5574 return;
5575 }
5576 }
5577 if (ctxt->myDoc == NULL) {
5578 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5579 }
5580 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5581 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5582
5583 ctxt->instate = XML_PARSER_DTD;
5584 ctxt->external = 1;
5585 while (((RAW == '<') && (NXT(1) == '?')) ||
5586 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005587 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005588 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005589 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005590
5591 GROW;
5592 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5593 xmlParseConditionalSections(ctxt);
5594 } else if (IS_BLANK(CUR)) {
5595 NEXT;
5596 } else if (RAW == '%') {
5597 xmlParsePEReference(ctxt);
5598 } else
5599 xmlParseMarkupDecl(ctxt);
5600
5601 /*
5602 * Pop-up of finished entities.
5603 */
5604 while ((RAW == 0) && (ctxt->inputNr > 1))
5605 xmlPopInput(ctxt);
5606
Daniel Veillardfdc91562002-07-01 21:52:03 +00005607 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005608 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005609 break;
5610 }
5611 }
5612
5613 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005614 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005615 }
5616
5617}
5618
5619/**
5620 * xmlParseReference:
5621 * @ctxt: an XML parser context
5622 *
5623 * parse and handle entity references in content, depending on the SAX
5624 * interface, this may end-up in a call to character() if this is a
5625 * CharRef, a predefined entity, if there is no reference() callback.
5626 * or if the parser was asked to switch to that mode.
5627 *
5628 * [67] Reference ::= EntityRef | CharRef
5629 */
5630void
5631xmlParseReference(xmlParserCtxtPtr ctxt) {
5632 xmlEntityPtr ent;
5633 xmlChar *val;
5634 if (RAW != '&') return;
5635
5636 if (NXT(1) == '#') {
5637 int i = 0;
5638 xmlChar out[10];
5639 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005640 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005641
5642 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5643 /*
5644 * So we are using non-UTF-8 buffers
5645 * Check that the char fit on 8bits, if not
5646 * generate a CharRef.
5647 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005648 if (value <= 0xFF) {
5649 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005650 out[1] = 0;
5651 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5652 (!ctxt->disableSAX))
5653 ctxt->sax->characters(ctxt->userData, out, 1);
5654 } else {
5655 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005656 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005657 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005658 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005659 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5660 (!ctxt->disableSAX))
5661 ctxt->sax->reference(ctxt->userData, out);
5662 }
5663 } else {
5664 /*
5665 * Just encode the value in UTF-8
5666 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005667 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 out[i] = 0;
5669 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5670 (!ctxt->disableSAX))
5671 ctxt->sax->characters(ctxt->userData, out, i);
5672 }
5673 } else {
5674 ent = xmlParseEntityRef(ctxt);
5675 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005676 if (!ctxt->wellFormed)
5677 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if ((ent->name != NULL) &&
5679 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5680 xmlNodePtr list = NULL;
5681 int ret;
5682
5683
5684 /*
5685 * The first reference to the entity trigger a parsing phase
5686 * where the ent->children is filled with the result from
5687 * the parsing.
5688 */
5689 if (ent->children == NULL) {
5690 xmlChar *value;
5691 value = ent->content;
5692
5693 /*
5694 * Check that this entity is well formed
5695 */
5696 if ((value != NULL) &&
5697 (value[1] == 0) && (value[0] == '<') &&
5698 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5699 /*
5700 * DONE: get definite answer on this !!!
5701 * Lots of entity decls are used to declare a single
5702 * char
5703 * <!ENTITY lt "<">
5704 * Which seems to be valid since
5705 * 2.4: The ampersand character (&) and the left angle
5706 * bracket (<) may appear in their literal form only
5707 * when used ... They are also legal within the literal
5708 * entity value of an internal entity declaration;i
5709 * see "4.3.2 Well-Formed Parsed Entities".
5710 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5711 * Looking at the OASIS test suite and James Clark
5712 * tests, this is broken. However the XML REC uses
5713 * it. Is the XML REC not well-formed ????
5714 * This is a hack to avoid this problem
5715 *
5716 * ANSWER: since lt gt amp .. are already defined,
5717 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005718 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005719 * is lousy but acceptable.
5720 */
5721 list = xmlNewDocText(ctxt->myDoc, value);
5722 if (list != NULL) {
5723 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5724 (ent->children == NULL)) {
5725 ent->children = list;
5726 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005727 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005728 list->parent = (xmlNodePtr) ent;
5729 } else {
5730 xmlFreeNodeList(list);
5731 }
5732 } else if (list != NULL) {
5733 xmlFreeNodeList(list);
5734 }
5735 } else {
5736 /*
5737 * 4.3.2: An internal general parsed entity is well-formed
5738 * if its replacement text matches the production labeled
5739 * content.
5740 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005741
5742 void *user_data;
5743 /*
5744 * This is a bit hackish but this seems the best
5745 * way to make sure both SAX and DOM entity support
5746 * behaves okay.
5747 */
5748 if (ctxt->userData == ctxt)
5749 user_data = NULL;
5750 else
5751 user_data = ctxt->userData;
5752
Owen Taylor3473f882001-02-23 17:55:21 +00005753 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5754 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005755 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5756 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005757 ctxt->depth--;
5758 } else if (ent->etype ==
5759 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5760 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005761 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005762 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005763 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005764 ctxt->depth--;
5765 } else {
5766 ret = -1;
5767 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5768 ctxt->sax->error(ctxt->userData,
5769 "Internal: invalid entity type\n");
5770 }
5771 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005772 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005773 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005774 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005775 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5776 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005777 (ent->children == NULL)) {
5778 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005779 if (ctxt->replaceEntities) {
5780 /*
5781 * Prune it directly in the generated document
5782 * except for single text nodes.
5783 */
5784 if ((list->type == XML_TEXT_NODE) &&
5785 (list->next == NULL)) {
5786 list->parent = (xmlNodePtr) ent;
5787 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005788 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005789 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005790 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005791 while (list != NULL) {
5792 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005793 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005794 if (list->next == NULL)
5795 ent->last = list;
5796 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005797 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005798 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005799 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5800 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005801 }
5802 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005803 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005804 while (list != NULL) {
5805 list->parent = (xmlNodePtr) ent;
5806 if (list->next == NULL)
5807 ent->last = list;
5808 list = list->next;
5809 }
Owen Taylor3473f882001-02-23 17:55:21 +00005810 }
5811 } else {
5812 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005813 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005814 }
5815 } else if (ret > 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005816 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005817 } else if (list != NULL) {
5818 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005819 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005820 }
5821 }
5822 }
5823 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5824 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5825 /*
5826 * Create a node.
5827 */
5828 ctxt->sax->reference(ctxt->userData, ent->name);
5829 return;
5830 } else if (ctxt->replaceEntities) {
5831 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5832 /*
5833 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005834 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005835 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005836 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005837 if ((list == NULL) && (ent->owner == 0)) {
5838 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005839 cur = ent->children;
5840 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005841 nw = xmlCopyNode(cur, 1);
5842 if (nw != NULL) {
5843 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005844 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005845 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005846 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005847 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005848 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005849 if (cur == ent->last)
5850 break;
5851 cur = cur->next;
5852 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005853 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005854 xmlAddEntityReference(ent, firstChild, nw);
5855 } else if (list == NULL) {
5856 xmlNodePtr nw = NULL, cur, next, last,
5857 firstChild = NULL;
5858 /*
5859 * Copy the entity child list and make it the new
5860 * entity child list. The goal is to make sure any
5861 * ID or REF referenced will be the one from the
5862 * document content and not the entity copy.
5863 */
5864 cur = ent->children;
5865 ent->children = NULL;
5866 last = ent->last;
5867 ent->last = NULL;
5868 while (cur != NULL) {
5869 next = cur->next;
5870 cur->next = NULL;
5871 cur->parent = NULL;
5872 nw = xmlCopyNode(cur, 1);
5873 if (nw != NULL) {
5874 nw->_private = cur->_private;
5875 if (firstChild == NULL){
5876 firstChild = cur;
5877 }
5878 xmlAddChild((xmlNodePtr) ent, nw);
5879 xmlAddChild(ctxt->node, cur);
5880 }
5881 if (cur == last)
5882 break;
5883 cur = next;
5884 }
5885 ent->owner = 1;
5886 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5887 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005888 } else {
5889 /*
5890 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005891 * node with a possible previous text one which
5892 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005893 */
5894 if (ent->children->type == XML_TEXT_NODE)
5895 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5896 if ((ent->last != ent->children) &&
5897 (ent->last->type == XML_TEXT_NODE))
5898 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5899 xmlAddChildList(ctxt->node, ent->children);
5900 }
5901
Owen Taylor3473f882001-02-23 17:55:21 +00005902 /*
5903 * This is to avoid a nasty side effect, see
5904 * characters() in SAX.c
5905 */
5906 ctxt->nodemem = 0;
5907 ctxt->nodelen = 0;
5908 return;
5909 } else {
5910 /*
5911 * Probably running in SAX mode
5912 */
5913 xmlParserInputPtr input;
5914
5915 input = xmlNewEntityInputStream(ctxt, ent);
5916 xmlPushInput(ctxt, input);
5917 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5918 (RAW == '<') && (NXT(1) == '?') &&
5919 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5920 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5921 xmlParseTextDecl(ctxt);
5922 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5923 /*
5924 * The XML REC instructs us to stop parsing right here
5925 */
5926 ctxt->instate = XML_PARSER_EOF;
5927 return;
5928 }
5929 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005930 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5931 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 }
5933 }
5934 return;
5935 }
5936 }
5937 } else {
5938 val = ent->content;
5939 if (val == NULL) return;
5940 /*
5941 * inline the entity.
5942 */
5943 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5944 (!ctxt->disableSAX))
5945 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5946 }
5947 }
5948}
5949
5950/**
5951 * xmlParseEntityRef:
5952 * @ctxt: an XML parser context
5953 *
5954 * parse ENTITY references declarations
5955 *
5956 * [68] EntityRef ::= '&' Name ';'
5957 *
5958 * [ WFC: Entity Declared ]
5959 * In a document without any DTD, a document with only an internal DTD
5960 * subset which contains no parameter entity references, or a document
5961 * with "standalone='yes'", the Name given in the entity reference
5962 * must match that in an entity declaration, except that well-formed
5963 * documents need not declare any of the following entities: amp, lt,
5964 * gt, apos, quot. The declaration of a parameter entity must precede
5965 * any reference to it. Similarly, the declaration of a general entity
5966 * must precede any reference to it which appears in a default value in an
5967 * attribute-list declaration. Note that if entities are declared in the
5968 * external subset or in external parameter entities, a non-validating
5969 * processor is not obligated to read and process their declarations;
5970 * for such documents, the rule that an entity must be declared is a
5971 * well-formedness constraint only if standalone='yes'.
5972 *
5973 * [ WFC: Parsed Entity ]
5974 * An entity reference must not contain the name of an unparsed entity
5975 *
5976 * Returns the xmlEntityPtr if found, or NULL otherwise.
5977 */
5978xmlEntityPtr
5979xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005980 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005981 xmlEntityPtr ent = NULL;
5982
5983 GROW;
5984
5985 if (RAW == '&') {
5986 NEXT;
5987 name = xmlParseName(ctxt);
5988 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005989 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5990 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005991 } else {
5992 if (RAW == ';') {
5993 NEXT;
5994 /*
5995 * Ask first SAX for entity resolution, otherwise try the
5996 * predefined set.
5997 */
5998 if (ctxt->sax != NULL) {
5999 if (ctxt->sax->getEntity != NULL)
6000 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006001 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006002 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006003 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6004 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006005 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006006 }
Owen Taylor3473f882001-02-23 17:55:21 +00006007 }
6008 /*
6009 * [ WFC: Entity Declared ]
6010 * In a document without any DTD, a document with only an
6011 * internal DTD subset which contains no parameter entity
6012 * references, or a document with "standalone='yes'", the
6013 * Name given in the entity reference must match that in an
6014 * entity declaration, except that well-formed documents
6015 * need not declare any of the following entities: amp, lt,
6016 * gt, apos, quot.
6017 * The declaration of a parameter entity must precede any
6018 * reference to it.
6019 * Similarly, the declaration of a general entity must
6020 * precede any reference to it which appears in a default
6021 * value in an attribute-list declaration. Note that if
6022 * entities are declared in the external subset or in
6023 * external parameter entities, a non-validating processor
6024 * is not obligated to read and process their declarations;
6025 * for such documents, the rule that an entity must be
6026 * declared is a well-formedness constraint only if
6027 * standalone='yes'.
6028 */
6029 if (ent == NULL) {
6030 if ((ctxt->standalone == 1) ||
6031 ((ctxt->hasExternalSubset == 0) &&
6032 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006033 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006034 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006035 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006036 } else {
6037 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00006038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00006039 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00006040 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006041 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006042 }
6043 }
6044
6045 /*
6046 * [ WFC: Parsed Entity ]
6047 * An entity reference must not contain the name of an
6048 * unparsed entity
6049 */
6050 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006051 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006052 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006053 }
6054
6055 /*
6056 * [ WFC: No External Entity References ]
6057 * Attribute values cannot contain direct or indirect
6058 * entity references to external entities.
6059 */
6060 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6061 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006062 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6063 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006064 }
6065 /*
6066 * [ WFC: No < in Attribute Values ]
6067 * The replacement text of any entity referred to directly or
6068 * indirectly in an attribute value (other than "&lt;") must
6069 * not contain a <.
6070 */
6071 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6072 (ent != NULL) &&
6073 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6074 (ent->content != NULL) &&
6075 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006076 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006077 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079
6080 /*
6081 * Internal check, no parameter entities here ...
6082 */
6083 else {
6084 switch (ent->etype) {
6085 case XML_INTERNAL_PARAMETER_ENTITY:
6086 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006087 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6088 "Attempt to reference the parameter entity '%s'\n",
6089 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006090 break;
6091 default:
6092 break;
6093 }
6094 }
6095
6096 /*
6097 * [ WFC: No Recursion ]
6098 * A parsed entity must not contain a recursive reference
6099 * to itself, either directly or indirectly.
6100 * Done somewhere else
6101 */
6102
6103 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006104 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 }
Owen Taylor3473f882001-02-23 17:55:21 +00006106 }
6107 }
6108 return(ent);
6109}
6110
6111/**
6112 * xmlParseStringEntityRef:
6113 * @ctxt: an XML parser context
6114 * @str: a pointer to an index in the string
6115 *
6116 * parse ENTITY references declarations, but this version parses it from
6117 * a string value.
6118 *
6119 * [68] EntityRef ::= '&' Name ';'
6120 *
6121 * [ WFC: Entity Declared ]
6122 * In a document without any DTD, a document with only an internal DTD
6123 * subset which contains no parameter entity references, or a document
6124 * with "standalone='yes'", the Name given in the entity reference
6125 * must match that in an entity declaration, except that well-formed
6126 * documents need not declare any of the following entities: amp, lt,
6127 * gt, apos, quot. The declaration of a parameter entity must precede
6128 * any reference to it. Similarly, the declaration of a general entity
6129 * must precede any reference to it which appears in a default value in an
6130 * attribute-list declaration. Note that if entities are declared in the
6131 * external subset or in external parameter entities, a non-validating
6132 * processor is not obligated to read and process their declarations;
6133 * for such documents, the rule that an entity must be declared is a
6134 * well-formedness constraint only if standalone='yes'.
6135 *
6136 * [ WFC: Parsed Entity ]
6137 * An entity reference must not contain the name of an unparsed entity
6138 *
6139 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6140 * is updated to the current location in the string.
6141 */
6142xmlEntityPtr
6143xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6144 xmlChar *name;
6145 const xmlChar *ptr;
6146 xmlChar cur;
6147 xmlEntityPtr ent = NULL;
6148
6149 if ((str == NULL) || (*str == NULL))
6150 return(NULL);
6151 ptr = *str;
6152 cur = *ptr;
6153 if (cur == '&') {
6154 ptr++;
6155 cur = *ptr;
6156 name = xmlParseStringName(ctxt, &ptr);
6157 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006158 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6159 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006160 } else {
6161 if (*ptr == ';') {
6162 ptr++;
6163 /*
6164 * Ask first SAX for entity resolution, otherwise try the
6165 * predefined set.
6166 */
6167 if (ctxt->sax != NULL) {
6168 if (ctxt->sax->getEntity != NULL)
6169 ent = ctxt->sax->getEntity(ctxt->userData, name);
6170 if (ent == NULL)
6171 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006172 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006173 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006174 }
Owen Taylor3473f882001-02-23 17:55:21 +00006175 }
6176 /*
6177 * [ WFC: Entity Declared ]
6178 * In a document without any DTD, a document with only an
6179 * internal DTD subset which contains no parameter entity
6180 * references, or a document with "standalone='yes'", the
6181 * Name given in the entity reference must match that in an
6182 * entity declaration, except that well-formed documents
6183 * need not declare any of the following entities: amp, lt,
6184 * gt, apos, quot.
6185 * The declaration of a parameter entity must precede any
6186 * reference to it.
6187 * Similarly, the declaration of a general entity must
6188 * precede any reference to it which appears in a default
6189 * value in an attribute-list declaration. Note that if
6190 * entities are declared in the external subset or in
6191 * external parameter entities, a non-validating processor
6192 * is not obligated to read and process their declarations;
6193 * for such documents, the rule that an entity must be
6194 * declared is a well-formedness constraint only if
6195 * standalone='yes'.
6196 */
6197 if (ent == NULL) {
6198 if ((ctxt->standalone == 1) ||
6199 ((ctxt->hasExternalSubset == 0) &&
6200 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006201 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006202 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 } else {
6204 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6205 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6206 ctxt->sax->warning(ctxt->userData,
6207 "Entity '%s' not defined\n", name);
6208 }
6209 }
6210
6211 /*
6212 * [ WFC: Parsed Entity ]
6213 * An entity reference must not contain the name of an
6214 * unparsed entity
6215 */
6216 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6217 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6219 ctxt->sax->error(ctxt->userData,
6220 "Entity reference to unparsed entity %s\n", name);
6221 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006222 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006223 }
6224
6225 /*
6226 * [ WFC: No External Entity References ]
6227 * Attribute values cannot contain direct or indirect
6228 * entity references to external entities.
6229 */
6230 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6231 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6232 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6234 ctxt->sax->error(ctxt->userData,
6235 "Attribute references external entity '%s'\n", name);
6236 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006237 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006238 }
6239 /*
6240 * [ WFC: No < in Attribute Values ]
6241 * The replacement text of any entity referred to directly or
6242 * indirectly in an attribute value (other than "&lt;") must
6243 * not contain a <.
6244 */
6245 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6246 (ent != NULL) &&
6247 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6248 (ent->content != NULL) &&
6249 (xmlStrchr(ent->content, '<'))) {
6250 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6252 ctxt->sax->error(ctxt->userData,
6253 "'<' in entity '%s' is not allowed in attributes values\n", name);
6254 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006255 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006256 }
6257
6258 /*
6259 * Internal check, no parameter entities here ...
6260 */
6261 else {
6262 switch (ent->etype) {
6263 case XML_INTERNAL_PARAMETER_ENTITY:
6264 case XML_EXTERNAL_PARAMETER_ENTITY:
6265 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6266 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6267 ctxt->sax->error(ctxt->userData,
6268 "Attempt to reference the parameter entity '%s'\n", name);
6269 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006270 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006271 break;
6272 default:
6273 break;
6274 }
6275 }
6276
6277 /*
6278 * [ WFC: No Recursion ]
6279 * A parsed entity must not contain a recursive reference
6280 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006281 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006282 */
6283
6284 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006285 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 }
6287 xmlFree(name);
6288 }
6289 }
6290 *str = ptr;
6291 return(ent);
6292}
6293
6294/**
6295 * xmlParsePEReference:
6296 * @ctxt: an XML parser context
6297 *
6298 * parse PEReference declarations
6299 * The entity content is handled directly by pushing it's content as
6300 * a new input stream.
6301 *
6302 * [69] PEReference ::= '%' Name ';'
6303 *
6304 * [ WFC: No Recursion ]
6305 * A parsed entity must not contain a recursive
6306 * reference to itself, either directly or indirectly.
6307 *
6308 * [ WFC: Entity Declared ]
6309 * In a document without any DTD, a document with only an internal DTD
6310 * subset which contains no parameter entity references, or a document
6311 * with "standalone='yes'", ... ... The declaration of a parameter
6312 * entity must precede any reference to it...
6313 *
6314 * [ VC: Entity Declared ]
6315 * In a document with an external subset or external parameter entities
6316 * with "standalone='no'", ... ... The declaration of a parameter entity
6317 * must precede any reference to it...
6318 *
6319 * [ WFC: In DTD ]
6320 * Parameter-entity references may only appear in the DTD.
6321 * NOTE: misleading but this is handled.
6322 */
6323void
6324xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006325 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006326 xmlEntityPtr entity = NULL;
6327 xmlParserInputPtr input;
6328
6329 if (RAW == '%') {
6330 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006331 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006332 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006333 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6334 "xmlParsePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006335 } else {
6336 if (RAW == ';') {
6337 NEXT;
6338 if ((ctxt->sax != NULL) &&
6339 (ctxt->sax->getParameterEntity != NULL))
6340 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6341 name);
6342 if (entity == NULL) {
6343 /*
6344 * [ WFC: Entity Declared ]
6345 * In a document without any DTD, a document with only an
6346 * internal DTD subset which contains no parameter entity
6347 * references, or a document with "standalone='yes'", ...
6348 * ... The declaration of a parameter entity must precede
6349 * any reference to it...
6350 */
6351 if ((ctxt->standalone == 1) ||
6352 ((ctxt->hasExternalSubset == 0) &&
6353 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006354 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006355 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006356 } else {
6357 /*
6358 * [ VC: Entity Declared ]
6359 * In a document with an external subset or external
6360 * parameter entities with "standalone='no'", ...
6361 * ... The declaration of a parameter entity must precede
6362 * any reference to it...
6363 */
6364 if ((!ctxt->disableSAX) &&
6365 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6366 ctxt->sax->warning(ctxt->userData,
6367 "PEReference: %%%s; not found\n", name);
6368 ctxt->valid = 0;
6369 }
6370 } else {
6371 /*
6372 * Internal checking in case the entity quest barfed
6373 */
6374 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6375 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6376 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6377 ctxt->sax->warning(ctxt->userData,
6378 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006379 } else if (ctxt->input->free != deallocblankswrapper) {
6380 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6381 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006382 } else {
6383 /*
6384 * TODO !!!
6385 * handle the extra spaces added before and after
6386 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6387 */
6388 input = xmlNewEntityInputStream(ctxt, entity);
6389 xmlPushInput(ctxt, input);
6390 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6391 (RAW == '<') && (NXT(1) == '?') &&
6392 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6393 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6394 xmlParseTextDecl(ctxt);
6395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6396 /*
6397 * The XML REC instructs us to stop parsing
6398 * right here
6399 */
6400 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 return;
6402 }
6403 }
Owen Taylor3473f882001-02-23 17:55:21 +00006404 }
6405 }
6406 ctxt->hasPErefs = 1;
6407 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006408 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006409 }
Owen Taylor3473f882001-02-23 17:55:21 +00006410 }
6411 }
6412}
6413
6414/**
6415 * xmlParseStringPEReference:
6416 * @ctxt: an XML parser context
6417 * @str: a pointer to an index in the string
6418 *
6419 * parse PEReference declarations
6420 *
6421 * [69] PEReference ::= '%' Name ';'
6422 *
6423 * [ WFC: No Recursion ]
6424 * A parsed entity must not contain a recursive
6425 * reference to itself, either directly or indirectly.
6426 *
6427 * [ WFC: Entity Declared ]
6428 * In a document without any DTD, a document with only an internal DTD
6429 * subset which contains no parameter entity references, or a document
6430 * with "standalone='yes'", ... ... The declaration of a parameter
6431 * entity must precede any reference to it...
6432 *
6433 * [ VC: Entity Declared ]
6434 * In a document with an external subset or external parameter entities
6435 * with "standalone='no'", ... ... The declaration of a parameter entity
6436 * must precede any reference to it...
6437 *
6438 * [ WFC: In DTD ]
6439 * Parameter-entity references may only appear in the DTD.
6440 * NOTE: misleading but this is handled.
6441 *
6442 * Returns the string of the entity content.
6443 * str is updated to the current value of the index
6444 */
6445xmlEntityPtr
6446xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6447 const xmlChar *ptr;
6448 xmlChar cur;
6449 xmlChar *name;
6450 xmlEntityPtr entity = NULL;
6451
6452 if ((str == NULL) || (*str == NULL)) return(NULL);
6453 ptr = *str;
6454 cur = *ptr;
6455 if (cur == '%') {
6456 ptr++;
6457 cur = *ptr;
6458 name = xmlParseStringName(ctxt, &ptr);
6459 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6461 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006462 } else {
6463 cur = *ptr;
6464 if (cur == ';') {
6465 ptr++;
6466 cur = *ptr;
6467 if ((ctxt->sax != NULL) &&
6468 (ctxt->sax->getParameterEntity != NULL))
6469 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6470 name);
6471 if (entity == NULL) {
6472 /*
6473 * [ WFC: Entity Declared ]
6474 * In a document without any DTD, a document with only an
6475 * internal DTD subset which contains no parameter entity
6476 * references, or a document with "standalone='yes'", ...
6477 * ... The declaration of a parameter entity must precede
6478 * any reference to it...
6479 */
6480 if ((ctxt->standalone == 1) ||
6481 ((ctxt->hasExternalSubset == 0) &&
6482 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006483 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006484 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006485 } else {
6486 /*
6487 * [ VC: Entity Declared ]
6488 * In a document with an external subset or external
6489 * parameter entities with "standalone='no'", ...
6490 * ... The declaration of a parameter entity must
6491 * precede any reference to it...
6492 */
6493 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6494 ctxt->sax->warning(ctxt->userData,
6495 "PEReference: %%%s; not found\n", name);
6496 ctxt->valid = 0;
6497 }
6498 } else {
6499 /*
6500 * Internal checking in case the entity quest barfed
6501 */
6502 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6503 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6504 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6505 ctxt->sax->warning(ctxt->userData,
6506 "Internal: %%%s; is not a parameter entity\n", name);
6507 }
6508 }
6509 ctxt->hasPErefs = 1;
6510 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006511 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006512 }
6513 xmlFree(name);
6514 }
6515 }
6516 *str = ptr;
6517 return(entity);
6518}
6519
6520/**
6521 * xmlParseDocTypeDecl:
6522 * @ctxt: an XML parser context
6523 *
6524 * parse a DOCTYPE declaration
6525 *
6526 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6527 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6528 *
6529 * [ VC: Root Element Type ]
6530 * The Name in the document type declaration must match the element
6531 * type of the root element.
6532 */
6533
6534void
6535xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006536 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006537 xmlChar *ExternalID = NULL;
6538 xmlChar *URI = NULL;
6539
6540 /*
6541 * We know that '<!DOCTYPE' has been detected.
6542 */
6543 SKIP(9);
6544
6545 SKIP_BLANKS;
6546
6547 /*
6548 * Parse the DOCTYPE name.
6549 */
6550 name = xmlParseName(ctxt);
6551 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6553 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006554 }
6555 ctxt->intSubName = name;
6556
6557 SKIP_BLANKS;
6558
6559 /*
6560 * Check for SystemID and ExternalID
6561 */
6562 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6563
6564 if ((URI != NULL) || (ExternalID != NULL)) {
6565 ctxt->hasExternalSubset = 1;
6566 }
6567 ctxt->extSubURI = URI;
6568 ctxt->extSubSystem = ExternalID;
6569
6570 SKIP_BLANKS;
6571
6572 /*
6573 * Create and update the internal subset.
6574 */
6575 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6576 (!ctxt->disableSAX))
6577 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6578
6579 /*
6580 * Is there any internal subset declarations ?
6581 * they are handled separately in xmlParseInternalSubset()
6582 */
6583 if (RAW == '[')
6584 return;
6585
6586 /*
6587 * We should be at the end of the DOCTYPE declaration.
6588 */
6589 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006590 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006591 }
6592 NEXT;
6593}
6594
6595/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006596 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006597 * @ctxt: an XML parser context
6598 *
6599 * parse the internal subset declaration
6600 *
6601 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6602 */
6603
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006604static void
Owen Taylor3473f882001-02-23 17:55:21 +00006605xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6606 /*
6607 * Is there any DTD definition ?
6608 */
6609 if (RAW == '[') {
6610 ctxt->instate = XML_PARSER_DTD;
6611 NEXT;
6612 /*
6613 * Parse the succession of Markup declarations and
6614 * PEReferences.
6615 * Subsequence (markupdecl | PEReference | S)*
6616 */
6617 while (RAW != ']') {
6618 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006619 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006620
6621 SKIP_BLANKS;
6622 xmlParseMarkupDecl(ctxt);
6623 xmlParsePEReference(ctxt);
6624
6625 /*
6626 * Pop-up of finished entities.
6627 */
6628 while ((RAW == 0) && (ctxt->inputNr > 1))
6629 xmlPopInput(ctxt);
6630
6631 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006632 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006633 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006634 break;
6635 }
6636 }
6637 if (RAW == ']') {
6638 NEXT;
6639 SKIP_BLANKS;
6640 }
6641 }
6642
6643 /*
6644 * We should be at the end of the DOCTYPE declaration.
6645 */
6646 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006647 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
6649 NEXT;
6650}
6651
6652/**
6653 * xmlParseAttribute:
6654 * @ctxt: an XML parser context
6655 * @value: a xmlChar ** used to store the value of the attribute
6656 *
6657 * parse an attribute
6658 *
6659 * [41] Attribute ::= Name Eq AttValue
6660 *
6661 * [ WFC: No External Entity References ]
6662 * Attribute values cannot contain direct or indirect entity references
6663 * to external entities.
6664 *
6665 * [ WFC: No < in Attribute Values ]
6666 * The replacement text of any entity referred to directly or indirectly in
6667 * an attribute value (other than "&lt;") must not contain a <.
6668 *
6669 * [ VC: Attribute Value Type ]
6670 * The attribute must have been declared; the value must be of the type
6671 * declared for it.
6672 *
6673 * [25] Eq ::= S? '=' S?
6674 *
6675 * With namespace:
6676 *
6677 * [NS 11] Attribute ::= QName Eq AttValue
6678 *
6679 * Also the case QName == xmlns:??? is handled independently as a namespace
6680 * definition.
6681 *
6682 * Returns the attribute name, and the value in *value.
6683 */
6684
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006685const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006686xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006687 const xmlChar *name;
6688 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006689
6690 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006691 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006692 name = xmlParseName(ctxt);
6693 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6695 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006696 return(NULL);
6697 }
6698
6699 /*
6700 * read the value
6701 */
6702 SKIP_BLANKS;
6703 if (RAW == '=') {
6704 NEXT;
6705 SKIP_BLANKS;
6706 val = xmlParseAttValue(ctxt);
6707 ctxt->instate = XML_PARSER_CONTENT;
6708 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006709 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006710 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006711 return(NULL);
6712 }
6713
6714 /*
6715 * Check that xml:lang conforms to the specification
6716 * No more registered as an error, just generate a warning now
6717 * since this was deprecated in XML second edition
6718 */
6719 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6720 if (!xmlCheckLanguageID(val)) {
6721 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6722 ctxt->sax->warning(ctxt->userData,
6723 "Malformed value for xml:lang : %s\n", val);
6724 }
6725 }
6726
6727 /*
6728 * Check that xml:space conforms to the specification
6729 */
6730 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6731 if (xmlStrEqual(val, BAD_CAST "default"))
6732 *(ctxt->space) = 0;
6733 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6734 *(ctxt->space) = 1;
6735 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006736 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006737"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006738 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006739 }
6740 }
6741
6742 *value = val;
6743 return(name);
6744}
6745
6746/**
6747 * xmlParseStartTag:
6748 * @ctxt: an XML parser context
6749 *
6750 * parse a start of tag either for rule element or
6751 * EmptyElement. In both case we don't parse the tag closing chars.
6752 *
6753 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6754 *
6755 * [ WFC: Unique Att Spec ]
6756 * No attribute name may appear more than once in the same start-tag or
6757 * empty-element tag.
6758 *
6759 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6760 *
6761 * [ WFC: Unique Att Spec ]
6762 * No attribute name may appear more than once in the same start-tag or
6763 * empty-element tag.
6764 *
6765 * With namespace:
6766 *
6767 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6768 *
6769 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6770 *
6771 * Returns the element name parsed
6772 */
6773
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006774const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006775xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006776 const xmlChar *name;
6777 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006778 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006779 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006780 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006781 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006782 int i;
6783
6784 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006785 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006786
6787 name = xmlParseName(ctxt);
6788 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006789 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006790 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006791 return(NULL);
6792 }
6793
6794 /*
6795 * Now parse the attributes, it ends up with the ending
6796 *
6797 * (S Attribute)* S?
6798 */
6799 SKIP_BLANKS;
6800 GROW;
6801
Daniel Veillard21a0f912001-02-25 19:54:14 +00006802 while ((RAW != '>') &&
6803 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard34ba3872003-07-15 13:34:05 +00006804 (IS_CHAR((unsigned int) RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006805 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006806 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006807
6808 attname = xmlParseAttribute(ctxt, &attvalue);
6809 if ((attname != NULL) && (attvalue != NULL)) {
6810 /*
6811 * [ WFC: Unique Att Spec ]
6812 * No attribute name may appear more than once in the same
6813 * start-tag or empty-element tag.
6814 */
6815 for (i = 0; i < nbatts;i += 2) {
6816 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006817 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006818 xmlFree(attvalue);
6819 goto failed;
6820 }
6821 }
Owen Taylor3473f882001-02-23 17:55:21 +00006822 /*
6823 * Add the pair to atts
6824 */
6825 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006826 maxatts = 22; /* allow for 10 attrs by default */
6827 atts = (const xmlChar **)
6828 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006829 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006830 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006831 if (attvalue != NULL)
6832 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006833 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006834 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006835 ctxt->atts = atts;
6836 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006837 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006838 const xmlChar **n;
6839
Owen Taylor3473f882001-02-23 17:55:21 +00006840 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006841 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006842 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006843 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006844 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006845 if (attvalue != NULL)
6846 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006847 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006848 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006849 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006850 ctxt->atts = atts;
6851 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006852 }
6853 atts[nbatts++] = attname;
6854 atts[nbatts++] = attvalue;
6855 atts[nbatts] = NULL;
6856 atts[nbatts + 1] = NULL;
6857 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006858 if (attvalue != NULL)
6859 xmlFree(attvalue);
6860 }
6861
6862failed:
6863
Daniel Veillard3772de32002-12-17 10:31:45 +00006864 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006865 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6866 break;
6867 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006870 }
6871 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006872 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6873 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006874 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6875 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006876 break;
6877 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006878 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006879 GROW;
6880 }
6881
6882 /*
6883 * SAX: Start of Element !
6884 */
6885 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006886 (!ctxt->disableSAX)) {
6887 if (nbatts > 0)
6888 ctxt->sax->startElement(ctxt->userData, name, atts);
6889 else
6890 ctxt->sax->startElement(ctxt->userData, name, NULL);
6891 }
Owen Taylor3473f882001-02-23 17:55:21 +00006892
6893 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006894 /* Free only the content strings */
6895 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006896 if (atts[i] != NULL)
6897 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006898 }
6899 return(name);
6900}
6901
6902/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006903 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006904 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006905 * @line: line of the start tag
6906 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006907 *
6908 * parse an end of tag
6909 *
6910 * [42] ETag ::= '</' Name S? '>'
6911 *
6912 * With namespace
6913 *
6914 * [NS 9] ETag ::= '</' QName S? '>'
6915 */
6916
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006917static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006918xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006919 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006920
6921 GROW;
6922 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006923 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6924 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006925 return;
6926 }
6927 SKIP(2);
6928
Daniel Veillard46de64e2002-05-29 08:21:33 +00006929 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006930
6931 /*
6932 * We should definitely be at the ending "S? '>'" part
6933 */
6934 GROW;
6935 SKIP_BLANKS;
Daniel Veillard34ba3872003-07-15 13:34:05 +00006936 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006937 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006938 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006939 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006940
6941 /*
6942 * [ WFC: Element Type Match ]
6943 * The Name in an element's end-tag must match the element type in the
6944 * start-tag.
6945 *
6946 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006947 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00006948 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00006950 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00006951 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006952 "Opening and ending tag mismatch: %s line %d and %s\n",
6953 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00006954 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006955 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006956 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00006957 }
6958
6959 }
6960 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006961 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006962 }
6963
6964 /*
6965 * SAX: End of Tag
6966 */
6967 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6968 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006969 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006970
Daniel Veillarde57ec792003-09-10 10:50:59 +00006971 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006972 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 return;
6974}
6975
6976/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006977 * xmlParseEndTag:
6978 * @ctxt: an XML parser context
6979 *
6980 * parse an end of tag
6981 *
6982 * [42] ETag ::= '</' Name S? '>'
6983 *
6984 * With namespace
6985 *
6986 * [NS 9] ETag ::= '</' QName S? '>'
6987 */
6988
6989void
6990xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006991 xmlParseEndTag1(ctxt, 0);
6992}
6993
6994/************************************************************************
6995 * *
6996 * SAX 2 specific operations *
6997 * *
6998 ************************************************************************/
6999
7000static const xmlChar *
7001xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7002 int len = 0, l;
7003 int c;
7004 int count = 0;
7005
7006 /*
7007 * Handler for more complex cases
7008 */
7009 GROW;
7010 c = CUR_CHAR(l);
7011 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007012 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007013 return(NULL);
7014 }
7015
7016 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7017 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007018 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard0fb18932003-09-07 09:14:37 +00007019 (IS_COMBINING(c)) ||
7020 (IS_EXTENDER(c)))) {
7021 if (count++ > 100) {
7022 count = 0;
7023 GROW;
7024 }
7025 len += l;
7026 NEXTL(l);
7027 c = CUR_CHAR(l);
7028 }
7029 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7030}
7031
7032/*
7033 * xmlGetNamespace:
7034 * @ctxt: an XML parser context
7035 * @prefix: the prefix to lookup
7036 *
7037 * Lookup the namespace name for the @prefix (which ca be NULL)
7038 * The prefix must come from the @ctxt->dict dictionnary
7039 *
7040 * Returns the namespace name or NULL if not bound
7041 */
7042static const xmlChar *
7043xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7044 int i;
7045
Daniel Veillarde57ec792003-09-10 10:50:59 +00007046 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007047 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007048 if (ctxt->nsTab[i] == prefix) {
7049 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7050 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007051 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007052 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007053 return(NULL);
7054}
7055
7056/**
7057 * xmlParseNCName:
7058 * @ctxt: an XML parser context
7059 *
7060 * parse an XML name.
7061 *
7062 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7063 * CombiningChar | Extender
7064 *
7065 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7066 *
7067 * Returns the Name parsed or NULL
7068 */
7069
7070static const xmlChar *
7071xmlParseNCName(xmlParserCtxtPtr ctxt) {
7072 const xmlChar *in;
7073 const xmlChar *ret;
7074 int count = 0;
7075
7076 /*
7077 * Accelerator for simple ASCII names
7078 */
7079 in = ctxt->input->cur;
7080 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7081 ((*in >= 0x41) && (*in <= 0x5A)) ||
7082 (*in == '_')) {
7083 in++;
7084 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7085 ((*in >= 0x41) && (*in <= 0x5A)) ||
7086 ((*in >= 0x30) && (*in <= 0x39)) ||
7087 (*in == '_') || (*in == '-') ||
7088 (*in == '.'))
7089 in++;
7090 if ((*in > 0) && (*in < 0x80)) {
7091 count = in - ctxt->input->cur;
7092 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7093 ctxt->input->cur = in;
7094 ctxt->nbChars += count;
7095 ctxt->input->col += count;
7096 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007098 }
7099 return(ret);
7100 }
7101 }
7102 return(xmlParseNCNameComplex(ctxt));
7103}
7104
7105/**
7106 * xmlParseQName:
7107 * @ctxt: an XML parser context
7108 * @prefix: pointer to store the prefix part
7109 *
7110 * parse an XML Namespace QName
7111 *
7112 * [6] QName ::= (Prefix ':')? LocalPart
7113 * [7] Prefix ::= NCName
7114 * [8] LocalPart ::= NCName
7115 *
7116 * Returns the Name parsed or NULL
7117 */
7118
7119static const xmlChar *
7120xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7121 const xmlChar *l, *p;
7122
7123 GROW;
7124
7125 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007126 if (l == NULL) {
7127 if (CUR == ':') {
7128 l = xmlParseName(ctxt);
7129 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007130 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7131 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007132 *prefix = NULL;
7133 return(l);
7134 }
7135 }
7136 return(NULL);
7137 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007138 if (CUR == ':') {
7139 NEXT;
7140 p = l;
7141 l = xmlParseNCName(ctxt);
7142 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007143 xmlChar *tmp;
7144
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007145 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7146 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007147 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7148 p = xmlDictLookup(ctxt->dict, tmp, -1);
7149 if (tmp != NULL) xmlFree(tmp);
7150 *prefix = NULL;
7151 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007152 }
7153 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007154 xmlChar *tmp;
7155
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007156 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7157 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007158 NEXT;
7159 tmp = (xmlChar *) xmlParseName(ctxt);
7160 if (tmp != NULL) {
7161 tmp = xmlBuildQName(tmp, l, NULL, 0);
7162 l = xmlDictLookup(ctxt->dict, tmp, -1);
7163 if (tmp != NULL) xmlFree(tmp);
7164 *prefix = p;
7165 return(l);
7166 }
7167 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7168 l = xmlDictLookup(ctxt->dict, tmp, -1);
7169 if (tmp != NULL) xmlFree(tmp);
7170 *prefix = p;
7171 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007172 }
7173 *prefix = p;
7174 } else
7175 *prefix = NULL;
7176 return(l);
7177}
7178
7179/**
7180 * xmlParseQNameAndCompare:
7181 * @ctxt: an XML parser context
7182 * @name: the localname
7183 * @prefix: the prefix, if any.
7184 *
7185 * parse an XML name and compares for match
7186 * (specialized for endtag parsing)
7187 *
7188 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7189 * and the name for mismatch
7190 */
7191
7192static const xmlChar *
7193xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7194 xmlChar const *prefix) {
7195 const xmlChar *cmp = name;
7196 const xmlChar *in;
7197 const xmlChar *ret;
7198 const xmlChar *prefix2;
7199
7200 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7201
7202 GROW;
7203 in = ctxt->input->cur;
7204
7205 cmp = prefix;
7206 while (*in != 0 && *in == *cmp) {
7207 ++in;
7208 ++cmp;
7209 }
7210 if ((*cmp == 0) && (*in == ':')) {
7211 in++;
7212 cmp = name;
7213 while (*in != 0 && *in == *cmp) {
7214 ++in;
7215 ++cmp;
7216 }
7217 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7218 /* success */
7219 ctxt->input->cur = in;
7220 return((const xmlChar*) 1);
7221 }
7222 }
7223 /*
7224 * all strings coms from the dictionary, equality can be done directly
7225 */
7226 ret = xmlParseQName (ctxt, &prefix2);
7227 if ((ret == name) && (prefix == prefix2))
7228 return((const xmlChar*) 1);
7229 return ret;
7230}
7231
7232/**
7233 * xmlParseAttValueInternal:
7234 * @ctxt: an XML parser context
7235 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007236 * @alloc: whether the attribute was reallocated as a new string
7237 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007238 *
7239 * parse a value for an attribute.
7240 * NOTE: if no normalization is needed, the routine will return pointers
7241 * directly from the data buffer.
7242 *
7243 * 3.3.3 Attribute-Value Normalization:
7244 * Before the value of an attribute is passed to the application or
7245 * checked for validity, the XML processor must normalize it as follows:
7246 * - a character reference is processed by appending the referenced
7247 * character to the attribute value
7248 * - an entity reference is processed by recursively processing the
7249 * replacement text of the entity
7250 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7251 * appending #x20 to the normalized value, except that only a single
7252 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7253 * parsed entity or the literal entity value of an internal parsed entity
7254 * - other characters are processed by appending them to the normalized value
7255 * If the declared value is not CDATA, then the XML processor must further
7256 * process the normalized attribute value by discarding any leading and
7257 * trailing space (#x20) characters, and by replacing sequences of space
7258 * (#x20) characters by a single space (#x20) character.
7259 * All attributes for which no declaration has been read should be treated
7260 * by a non-validating parser as if declared CDATA.
7261 *
7262 * Returns the AttValue parsed or NULL. The value has to be freed by the
7263 * caller if it was copied, this can be detected by val[*len] == 0.
7264 */
7265
7266static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007267xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7268 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007269{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007270 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007271 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007272 xmlChar *ret = NULL;
7273
7274 GROW;
7275 in = (xmlChar *) CUR_PTR;
7276 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007277 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007278 return (NULL);
7279 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007280 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007281
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007282 /*
7283 * try to handle in this routine the most common case where no
7284 * allocation of a new string is required and where content is
7285 * pure ASCII.
7286 */
7287 limit = *in++;
7288 end = ctxt->input->end;
7289 start = in;
7290 if (in >= end) {
7291 const xmlChar *oldbase = ctxt->input->base;
7292 GROW;
7293 if (oldbase != ctxt->input->base) {
7294 long delta = ctxt->input->base - oldbase;
7295 start = start + delta;
7296 in = in + delta;
7297 }
7298 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007299 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007300 if (normalize) {
7301 /*
7302 * Skip any leading spaces
7303 */
7304 while ((in < end) && (*in != limit) &&
7305 ((*in == 0x20) || (*in == 0x9) ||
7306 (*in == 0xA) || (*in == 0xD))) {
7307 in++;
7308 start = in;
7309 if (in >= end) {
7310 const xmlChar *oldbase = ctxt->input->base;
7311 GROW;
7312 if (oldbase != ctxt->input->base) {
7313 long delta = ctxt->input->base - oldbase;
7314 start = start + delta;
7315 in = in + delta;
7316 }
7317 end = ctxt->input->end;
7318 }
7319 }
7320 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7321 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7322 if ((*in++ == 0x20) && (*in == 0x20)) break;
7323 if (in >= end) {
7324 const xmlChar *oldbase = ctxt->input->base;
7325 GROW;
7326 if (oldbase != ctxt->input->base) {
7327 long delta = ctxt->input->base - oldbase;
7328 start = start + delta;
7329 in = in + delta;
7330 }
7331 end = ctxt->input->end;
7332 }
7333 }
7334 last = in;
7335 /*
7336 * skip the trailing blanks
7337 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007338 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007339 while ((in < end) && (*in != limit) &&
7340 ((*in == 0x20) || (*in == 0x9) ||
7341 (*in == 0xA) || (*in == 0xD))) {
7342 in++;
7343 if (in >= end) {
7344 const xmlChar *oldbase = ctxt->input->base;
7345 GROW;
7346 if (oldbase != ctxt->input->base) {
7347 long delta = ctxt->input->base - oldbase;
7348 start = start + delta;
7349 in = in + delta;
7350 last = last + delta;
7351 }
7352 end = ctxt->input->end;
7353 }
7354 }
7355 if (*in != limit) goto need_complex;
7356 } else {
7357 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7358 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7359 in++;
7360 if (in >= end) {
7361 const xmlChar *oldbase = ctxt->input->base;
7362 GROW;
7363 if (oldbase != ctxt->input->base) {
7364 long delta = ctxt->input->base - oldbase;
7365 start = start + delta;
7366 in = in + delta;
7367 }
7368 end = ctxt->input->end;
7369 }
7370 }
7371 last = in;
7372 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007373 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007374 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007375 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007376 *len = last - start;
7377 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007379 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007380 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 }
7382 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007383 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007384 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007385need_complex:
7386 if (alloc) *alloc = 1;
7387 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007388}
7389
7390/**
7391 * xmlParseAttribute2:
7392 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007393 * @pref: the element prefix
7394 * @elem: the element name
7395 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007396 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007397 * @len: an int * to save the length of the attribute
7398 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007399 *
7400 * parse an attribute in the new SAX2 framework.
7401 *
7402 * Returns the attribute name, and the value in *value, .
7403 */
7404
7405static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007406xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7407 const xmlChar *pref, const xmlChar *elem,
7408 const xmlChar **prefix, xmlChar **value,
7409 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007410 const xmlChar *name;
7411 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007412 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007413
7414 *value = NULL;
7415 GROW;
7416 name = xmlParseQName(ctxt, prefix);
7417 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007418 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7419 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007420 return(NULL);
7421 }
7422
7423 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007424 * get the type if needed
7425 */
7426 if (ctxt->attsSpecial != NULL) {
7427 int type;
7428
7429 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7430 pref, elem, *prefix, name);
7431 if (type != 0) normalize = 1;
7432 }
7433
7434 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 * read the value
7436 */
7437 SKIP_BLANKS;
7438 if (RAW == '=') {
7439 NEXT;
7440 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007441 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 ctxt->instate = XML_PARSER_CONTENT;
7443 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007444 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007445 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007446 return(NULL);
7447 }
7448
7449 /*
7450 * Check that xml:lang conforms to the specification
7451 * No more registered as an error, just generate a warning now
7452 * since this was deprecated in XML second edition
7453 */
7454 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7455 if (!xmlCheckLanguageID(val)) {
7456 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7457 ctxt->sax->warning(ctxt->userData,
7458 "Malformed value for xml:lang : %s\n", val);
7459 }
7460 }
7461
7462 /*
7463 * Check that xml:space conforms to the specification
7464 */
7465 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7466 if (xmlStrEqual(val, BAD_CAST "default"))
7467 *(ctxt->space) = 0;
7468 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7469 *(ctxt->space) = 1;
7470 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007471 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7473 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007474 }
7475 }
7476
7477 *value = val;
7478 return(name);
7479}
7480
7481/**
7482 * xmlParseStartTag2:
7483 * @ctxt: an XML parser context
7484 *
7485 * parse a start of tag either for rule element or
7486 * EmptyElement. In both case we don't parse the tag closing chars.
7487 * This routine is called when running SAX2 parsing
7488 *
7489 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7490 *
7491 * [ WFC: Unique Att Spec ]
7492 * No attribute name may appear more than once in the same start-tag or
7493 * empty-element tag.
7494 *
7495 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7496 *
7497 * [ WFC: Unique Att Spec ]
7498 * No attribute name may appear more than once in the same start-tag or
7499 * empty-element tag.
7500 *
7501 * With namespace:
7502 *
7503 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7504 *
7505 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7506 *
7507 * Returns the element name parsed
7508 */
7509
7510static const xmlChar *
7511xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7512 const xmlChar **URI) {
7513 const xmlChar *localname;
7514 const xmlChar *prefix;
7515 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007516 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007517 const xmlChar *nsname;
7518 xmlChar *attvalue;
7519 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007520 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007521 int nratts, nbatts, nbdef;
7522 int i, j, nbNs, attval;
7523 const xmlChar *base;
7524 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525
7526 if (RAW != '<') return(NULL);
7527 NEXT1;
7528
7529 /*
7530 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7531 * point since the attribute values may be stored as pointers to
7532 * the buffer and calling SHRINK would destroy them !
7533 * The Shrinking is only possible once the full set of attribute
7534 * callbacks have been done.
7535 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007536reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007537 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007538 base = ctxt->input->base;
7539 cur = ctxt->input->cur - ctxt->input->base;
7540 nbatts = 0;
7541 nratts = 0;
7542 nbdef = 0;
7543 nbNs = 0;
7544 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007545
7546 localname = xmlParseQName(ctxt, &prefix);
7547 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007548 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7549 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007550 return(NULL);
7551 }
7552
7553 /*
7554 * Now parse the attributes, it ends up with the ending
7555 *
7556 * (S Attribute)* S?
7557 */
7558 SKIP_BLANKS;
7559 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007560 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561
7562 while ((RAW != '>') &&
7563 ((RAW != '/') || (NXT(1) != '>')) &&
7564 (IS_CHAR((unsigned int) RAW))) {
7565 const xmlChar *q = CUR_PTR;
7566 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007567 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007568
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007569 attname = xmlParseAttribute2(ctxt, prefix, localname,
7570 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007571 if ((attname != NULL) && (attvalue != NULL)) {
7572 if (len < 0) len = xmlStrlen(attvalue);
7573 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007574 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7575 xmlURIPtr uri;
7576
7577 if (*URL != 0) {
7578 uri = xmlParseURI((const char *) URL);
7579 if (uri == NULL) {
7580 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7581 ctxt->sax->warning(ctxt->userData,
7582 "xmlns: %s not a valid URI\n", URL);
7583 } else {
7584 if (uri->scheme == NULL) {
7585 if ((ctxt->sax != NULL) &&
7586 (ctxt->sax->warning != NULL))
7587 ctxt->sax->warning(ctxt->userData,
7588 "xmlns: URI %s is not absolute\n", URL);
7589 }
7590 xmlFreeURI(uri);
7591 }
7592 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007593 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007594 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007595 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007596 for (j = 1;j <= nbNs;j++)
7597 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7598 break;
7599 if (j <= nbNs)
7600 xmlErrAttributeDup(ctxt, NULL, attname);
7601 else
7602 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007603 if (alloc != 0) xmlFree(attvalue);
7604 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007605 continue;
7606 }
7607 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007608 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7609 xmlURIPtr uri;
7610
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007611 if (attname == ctxt->str_xml) {
7612 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007613 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7614 "xml namespace prefix mapped to wrong URI\n",
7615 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007616 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007617 /*
7618 * Do not keep a namespace definition node
7619 */
7620 if (alloc != 0) xmlFree(attvalue);
7621 SKIP_BLANKS;
7622 continue;
7623 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624 uri = xmlParseURI((const char *) URL);
7625 if (uri == NULL) {
7626 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7627 ctxt->sax->warning(ctxt->userData,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007628 "xmlns:%s: '%s' is not a valid URI\n",
7629 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007630 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007631 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007632 if ((ctxt->sax != NULL) &&
7633 (ctxt->sax->warning != NULL))
7634 ctxt->sax->warning(ctxt->userData,
7635 "xmlns:%s: URI %s is not absolute\n",
7636 attname, URL);
7637 }
7638 xmlFreeURI(uri);
7639 }
7640
Daniel Veillard0fb18932003-09-07 09:14:37 +00007641 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007642 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007643 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007644 for (j = 1;j <= nbNs;j++)
7645 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7646 break;
7647 if (j <= nbNs)
7648 xmlErrAttributeDup(ctxt, aprefix, attname);
7649 else
7650 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007651 if (alloc != 0) xmlFree(attvalue);
7652 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 continue;
7654 }
7655
7656 /*
7657 * Add the pair to atts
7658 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007659 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7660 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 if (attvalue[len] == 0)
7662 xmlFree(attvalue);
7663 goto failed;
7664 }
7665 maxatts = ctxt->maxatts;
7666 atts = ctxt->atts;
7667 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007668 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007669 atts[nbatts++] = attname;
7670 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 atts[nbatts++] = attvalue;
7673 attvalue += len;
7674 atts[nbatts++] = attvalue;
7675 /*
7676 * tag if some deallocation is needed
7677 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007678 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679 } else {
7680 if ((attvalue != NULL) && (attvalue[len] == 0))
7681 xmlFree(attvalue);
7682 }
7683
7684failed:
7685
7686 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7689 break;
7690 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7692 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 }
7694 SKIP_BLANKS;
7695 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7696 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007697 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007698 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007699 break;
7700 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007702 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007703 }
7704
Daniel Veillard0fb18932003-09-07 09:14:37 +00007705 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007706 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007707 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007708 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007709 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7710 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007711 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007712 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007713 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007715 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007716 /*
7717 * [ WFC: Unique Att Spec ]
7718 * No attribute name may appear more than once in the same
7719 * start-tag or empty-element tag.
7720 * As extended by the Namespace in XML REC.
7721 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007722 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 if (atts[i] == atts[j]) {
7724 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007725 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007726 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007728 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007729 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007730 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007731 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007732 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733 }
7734 }
7735 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007736 }
7737
7738 /*
7739 * The attributes defaulting
7740 */
7741 if (ctxt->attsDefault != NULL) {
7742 xmlDefAttrsPtr defaults;
7743
7744 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7745 if (defaults != NULL) {
7746 for (i = 0;i < defaults->nbAttrs;i++) {
7747 attname = defaults->values[4 * i];
7748 aprefix = defaults->values[4 * i + 1];
7749
7750 /*
7751 * special work for namespaces defaulted defs
7752 */
7753 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7754 /*
7755 * check that it's not a defined namespace
7756 */
7757 for (j = 1;j <= nbNs;j++)
7758 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7759 break;
7760 if (j <= nbNs) continue;
7761
7762 nsname = xmlGetNamespace(ctxt, NULL);
7763 if (nsname != defaults->values[4 * i + 2]) {
7764 if (nsPush(ctxt, NULL,
7765 defaults->values[4 * i + 2]) > 0)
7766 nbNs++;
7767 }
7768 } else if (aprefix == ctxt->str_xmlns) {
7769 /*
7770 * check that it's not a defined namespace
7771 */
7772 for (j = 1;j <= nbNs;j++)
7773 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7774 break;
7775 if (j <= nbNs) continue;
7776
7777 nsname = xmlGetNamespace(ctxt, attname);
7778 if (nsname != defaults->values[2]) {
7779 if (nsPush(ctxt, attname,
7780 defaults->values[4 * i + 2]) > 0)
7781 nbNs++;
7782 }
7783 } else {
7784 /*
7785 * check that it's not a defined attribute
7786 */
7787 for (j = 0;j < nbatts;j+=5) {
7788 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7789 break;
7790 }
7791 if (j < nbatts) continue;
7792
7793 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7794 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
7795 goto failed;
7796 }
7797 maxatts = ctxt->maxatts;
7798 atts = ctxt->atts;
7799 }
7800 atts[nbatts++] = attname;
7801 atts[nbatts++] = aprefix;
7802 if (aprefix == NULL)
7803 atts[nbatts++] = NULL;
7804 else
7805 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7806 atts[nbatts++] = defaults->values[4 * i + 2];
7807 atts[nbatts++] = defaults->values[4 * i + 3];
7808 nbdef++;
7809 }
7810 }
7811 }
7812 }
7813
7814 nsname = xmlGetNamespace(ctxt, prefix);
7815 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007816 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7817 "Namespace prefix %s on %s is not defined\n",
7818 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007819 }
7820 *pref = prefix;
7821 *URI = nsname;
7822
7823 /*
7824 * SAX: Start of Element !
7825 */
7826 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7827 (!ctxt->disableSAX)) {
7828 if (nbNs > 0)
7829 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7830 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7831 nbatts / 5, nbdef, atts);
7832 else
7833 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7834 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7835 }
7836
7837 /*
7838 * Free up attribute allocated strings if needed
7839 */
7840 if (attval != 0) {
7841 for (i = 3,j = 0; j < nratts;i += 5,j++)
7842 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7843 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007844 }
7845
7846 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007847
7848base_changed:
7849 /*
7850 * the attribute strings are valid iif the base didn't changed
7851 */
7852 if (attval != 0) {
7853 for (i = 3,j = 0; j < nratts;i += 5,j++)
7854 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7855 xmlFree((xmlChar *) atts[i]);
7856 }
7857 ctxt->input->cur = ctxt->input->base + cur;
7858 if (ctxt->wellFormed == 1) {
7859 goto reparse;
7860 }
7861 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007862}
7863
7864/**
7865 * xmlParseEndTag2:
7866 * @ctxt: an XML parser context
7867 * @line: line of the start tag
7868 * @nsNr: number of namespaces on the start tag
7869 *
7870 * parse an end of tag
7871 *
7872 * [42] ETag ::= '</' Name S? '>'
7873 *
7874 * With namespace
7875 *
7876 * [NS 9] ETag ::= '</' QName S? '>'
7877 */
7878
7879static void
7880xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7881 const xmlChar *URI, int line, int nsNr) {
7882 const xmlChar *name;
7883
7884 GROW;
7885 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007886 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007887 return;
7888 }
7889 SKIP(2);
7890
7891 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7892
7893 /*
7894 * We should definitely be at the ending "S? '>'" part
7895 */
7896 GROW;
7897 SKIP_BLANKS;
7898 if ((!IS_CHAR((unsigned int) RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007899 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900 } else
7901 NEXT1;
7902
7903 /*
7904 * [ WFC: Element Type Match ]
7905 * The Name in an element's end-tag must match the element type in the
7906 * start-tag.
7907 *
7908 */
7909 if (name != (xmlChar*)1) {
7910 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
7911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7912 if (name != NULL) {
7913 ctxt->sax->error(ctxt->userData,
7914 "Opening and ending tag mismatch: %s line %d and %s\n",
7915 ctxt->name, line, name);
7916 } else {
7917 ctxt->sax->error(ctxt->userData,
7918 "Ending tag error for: %s line %d\n", ctxt->name, line);
7919 }
7920
7921 }
7922 ctxt->wellFormed = 0;
7923 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
7924 }
7925
7926 /*
7927 * SAX: End of Tag
7928 */
7929 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7930 (!ctxt->disableSAX))
7931 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7932
Daniel Veillard0fb18932003-09-07 09:14:37 +00007933 spacePop(ctxt);
7934 if (nsNr != 0)
7935 nsPop(ctxt, nsNr);
7936 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007937}
7938
7939/**
Owen Taylor3473f882001-02-23 17:55:21 +00007940 * xmlParseCDSect:
7941 * @ctxt: an XML parser context
7942 *
7943 * Parse escaped pure raw content.
7944 *
7945 * [18] CDSect ::= CDStart CData CDEnd
7946 *
7947 * [19] CDStart ::= '<![CDATA['
7948 *
7949 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7950 *
7951 * [21] CDEnd ::= ']]>'
7952 */
7953void
7954xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7955 xmlChar *buf = NULL;
7956 int len = 0;
7957 int size = XML_PARSER_BUFFER_SIZE;
7958 int r, rl;
7959 int s, sl;
7960 int cur, l;
7961 int count = 0;
7962
7963 if ((NXT(0) == '<') && (NXT(1) == '!') &&
7964 (NXT(2) == '[') && (NXT(3) == 'C') &&
7965 (NXT(4) == 'D') && (NXT(5) == 'A') &&
7966 (NXT(6) == 'T') && (NXT(7) == 'A') &&
7967 (NXT(8) == '[')) {
7968 SKIP(9);
7969 } else
7970 return;
7971
7972 ctxt->instate = XML_PARSER_CDATA_SECTION;
7973 r = CUR_CHAR(rl);
7974 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007975 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007976 ctxt->instate = XML_PARSER_CONTENT;
7977 return;
7978 }
7979 NEXTL(rl);
7980 s = CUR_CHAR(sl);
7981 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007982 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007983 ctxt->instate = XML_PARSER_CONTENT;
7984 return;
7985 }
7986 NEXTL(sl);
7987 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007988 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007989 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007990 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007991 return;
7992 }
7993 while (IS_CHAR(cur) &&
7994 ((r != ']') || (s != ']') || (cur != '>'))) {
7995 if (len + 5 >= size) {
7996 size *= 2;
7997 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7998 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007999 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008000 return;
8001 }
8002 }
8003 COPY_BUF(rl,buf,len,r);
8004 r = s;
8005 rl = sl;
8006 s = cur;
8007 sl = l;
8008 count++;
8009 if (count > 50) {
8010 GROW;
8011 count = 0;
8012 }
8013 NEXTL(l);
8014 cur = CUR_CHAR(l);
8015 }
8016 buf[len] = 0;
8017 ctxt->instate = XML_PARSER_CONTENT;
8018 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008019 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008020 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008021 xmlFree(buf);
8022 return;
8023 }
8024 NEXTL(l);
8025
8026 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008027 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008028 */
8029 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8030 if (ctxt->sax->cdataBlock != NULL)
8031 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008032 else if (ctxt->sax->characters != NULL)
8033 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008034 }
8035 xmlFree(buf);
8036}
8037
8038/**
8039 * xmlParseContent:
8040 * @ctxt: an XML parser context
8041 *
8042 * Parse a content:
8043 *
8044 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8045 */
8046
8047void
8048xmlParseContent(xmlParserCtxtPtr ctxt) {
8049 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008050 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008051 ((RAW != '<') || (NXT(1) != '/'))) {
8052 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008053 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008054 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008055
8056 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008057 * First case : a Processing Instruction.
8058 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008059 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008060 xmlParsePI(ctxt);
8061 }
8062
8063 /*
8064 * Second case : a CDSection
8065 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008066 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008067 (NXT(2) == '[') && (NXT(3) == 'C') &&
8068 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8069 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8070 (NXT(8) == '[')) {
8071 xmlParseCDSect(ctxt);
8072 }
8073
8074 /*
8075 * Third case : a comment
8076 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008077 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008078 (NXT(2) == '-') && (NXT(3) == '-')) {
8079 xmlParseComment(ctxt);
8080 ctxt->instate = XML_PARSER_CONTENT;
8081 }
8082
8083 /*
8084 * Fourth case : a sub-element.
8085 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008086 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008087 xmlParseElement(ctxt);
8088 }
8089
8090 /*
8091 * Fifth case : a reference. If if has not been resolved,
8092 * parsing returns it's Name, create the node
8093 */
8094
Daniel Veillard21a0f912001-02-25 19:54:14 +00008095 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008096 xmlParseReference(ctxt);
8097 }
8098
8099 /*
8100 * Last case, text. Note that References are handled directly.
8101 */
8102 else {
8103 xmlParseCharData(ctxt, 0);
8104 }
8105
8106 GROW;
8107 /*
8108 * Pop-up of finished entities.
8109 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008110 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008111 xmlPopInput(ctxt);
8112 SHRINK;
8113
Daniel Veillardfdc91562002-07-01 21:52:03 +00008114 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008115 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8116 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008117 ctxt->instate = XML_PARSER_EOF;
8118 break;
8119 }
8120 }
8121}
8122
8123/**
8124 * xmlParseElement:
8125 * @ctxt: an XML parser context
8126 *
8127 * parse an XML element, this is highly recursive
8128 *
8129 * [39] element ::= EmptyElemTag | STag content ETag
8130 *
8131 * [ WFC: Element Type Match ]
8132 * The Name in an element's end-tag must match the element type in the
8133 * start-tag.
8134 *
Owen Taylor3473f882001-02-23 17:55:21 +00008135 */
8136
8137void
8138xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008139 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008140 const xmlChar *prefix;
8141 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008142 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008143 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008144 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008145 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008146
8147 /* Capture start position */
8148 if (ctxt->record_info) {
8149 node_info.begin_pos = ctxt->input->consumed +
8150 (CUR_PTR - ctxt->input->base);
8151 node_info.begin_line = ctxt->input->line;
8152 }
8153
8154 if (ctxt->spaceNr == 0)
8155 spacePush(ctxt, -1);
8156 else
8157 spacePush(ctxt, *ctxt->space);
8158
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008159 line = ctxt->input->line;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008160 if (ctxt->sax2)
8161 name = xmlParseStartTag2(ctxt, &prefix, &URI);
8162 else
8163 name = xmlParseStartTag(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008164 if (name == NULL) {
8165 spacePop(ctxt);
8166 return;
8167 }
8168 namePush(ctxt, name);
8169 ret = ctxt->node;
8170
8171 /*
8172 * [ VC: Root Element Type ]
8173 * The Name in the document type declaration must match the element
8174 * type of the root element.
8175 */
8176 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8177 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8178 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8179
8180 /*
8181 * Check for an Empty Element.
8182 */
8183 if ((RAW == '/') && (NXT(1) == '>')) {
8184 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008185 if (ctxt->sax2) {
8186 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8187 (!ctxt->disableSAX))
8188 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8189 } else {
8190 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8191 (!ctxt->disableSAX))
8192 ctxt->sax->endElement(ctxt->userData, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008193 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008194 namePop(ctxt);
8195 spacePop(ctxt);
8196 if (nsNr != ctxt->nsNr)
8197 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008198 if ( ret != NULL && ctxt->record_info ) {
8199 node_info.end_pos = ctxt->input->consumed +
8200 (CUR_PTR - ctxt->input->base);
8201 node_info.end_line = ctxt->input->line;
8202 node_info.node = ret;
8203 xmlParserAddNodeInfo(ctxt, &node_info);
8204 }
8205 return;
8206 }
8207 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008208 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008209 } else {
8210 ctxt->errNo = XML_ERR_GT_REQUIRED;
8211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8212 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008213 "Couldn't find end of Start Tag %s line %d\n",
8214 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008215 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008216 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008217
8218 /*
8219 * end of parsing of this node.
8220 */
8221 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008222 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008223 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008224 if (nsNr != ctxt->nsNr)
8225 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008226
8227 /*
8228 * Capture end position and add node
8229 */
8230 if ( ret != NULL && ctxt->record_info ) {
8231 node_info.end_pos = ctxt->input->consumed +
8232 (CUR_PTR - ctxt->input->base);
8233 node_info.end_line = ctxt->input->line;
8234 node_info.node = ret;
8235 xmlParserAddNodeInfo(ctxt, &node_info);
8236 }
8237 return;
8238 }
8239
8240 /*
8241 * Parse the content of the element:
8242 */
8243 xmlParseContent(ctxt);
Daniel Veillard34ba3872003-07-15 13:34:05 +00008244 if (!IS_CHAR((unsigned int) RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00008245 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00008246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8247 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008248 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008249 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008250 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008251
8252 /*
8253 * end of parsing of this node.
8254 */
8255 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008256 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008257 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008258 if (nsNr != ctxt->nsNr)
8259 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 return;
8261 }
8262
8263 /*
8264 * parse the end of tag: '</' should be here.
8265 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008266 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008267 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008268 namePop(ctxt);
8269 } else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270 xmlParseEndTag1(ctxt, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008271
8272 /*
8273 * Capture end position and add node
8274 */
8275 if ( ret != NULL && ctxt->record_info ) {
8276 node_info.end_pos = ctxt->input->consumed +
8277 (CUR_PTR - ctxt->input->base);
8278 node_info.end_line = ctxt->input->line;
8279 node_info.node = ret;
8280 xmlParserAddNodeInfo(ctxt, &node_info);
8281 }
8282}
8283
8284/**
8285 * xmlParseVersionNum:
8286 * @ctxt: an XML parser context
8287 *
8288 * parse the XML version value.
8289 *
8290 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8291 *
8292 * Returns the string giving the XML version number, or NULL
8293 */
8294xmlChar *
8295xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8296 xmlChar *buf = NULL;
8297 int len = 0;
8298 int size = 10;
8299 xmlChar cur;
8300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008301 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008302 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008304 return(NULL);
8305 }
8306 cur = CUR;
8307 while (((cur >= 'a') && (cur <= 'z')) ||
8308 ((cur >= 'A') && (cur <= 'Z')) ||
8309 ((cur >= '0') && (cur <= '9')) ||
8310 (cur == '_') || (cur == '.') ||
8311 (cur == ':') || (cur == '-')) {
8312 if (len + 1 >= size) {
8313 size *= 2;
8314 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8315 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008316 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008317 return(NULL);
8318 }
8319 }
8320 buf[len++] = cur;
8321 NEXT;
8322 cur=CUR;
8323 }
8324 buf[len] = 0;
8325 return(buf);
8326}
8327
8328/**
8329 * xmlParseVersionInfo:
8330 * @ctxt: an XML parser context
8331 *
8332 * parse the XML version.
8333 *
8334 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8335 *
8336 * [25] Eq ::= S? '=' S?
8337 *
8338 * Returns the version string, e.g. "1.0"
8339 */
8340
8341xmlChar *
8342xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8343 xmlChar *version = NULL;
8344 const xmlChar *q;
8345
8346 if ((RAW == 'v') && (NXT(1) == 'e') &&
8347 (NXT(2) == 'r') && (NXT(3) == 's') &&
8348 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8349 (NXT(6) == 'n')) {
8350 SKIP(7);
8351 SKIP_BLANKS;
8352 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008353 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008354 return(NULL);
8355 }
8356 NEXT;
8357 SKIP_BLANKS;
8358 if (RAW == '"') {
8359 NEXT;
8360 q = CUR_PTR;
8361 version = xmlParseVersionNum(ctxt);
8362 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008363 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008364 } else
8365 NEXT;
8366 } else if (RAW == '\''){
8367 NEXT;
8368 q = CUR_PTR;
8369 version = xmlParseVersionNum(ctxt);
8370 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008371 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008372 } else
8373 NEXT;
8374 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008376 }
8377 }
8378 return(version);
8379}
8380
8381/**
8382 * xmlParseEncName:
8383 * @ctxt: an XML parser context
8384 *
8385 * parse the XML encoding name
8386 *
8387 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8388 *
8389 * Returns the encoding name value or NULL
8390 */
8391xmlChar *
8392xmlParseEncName(xmlParserCtxtPtr ctxt) {
8393 xmlChar *buf = NULL;
8394 int len = 0;
8395 int size = 10;
8396 xmlChar cur;
8397
8398 cur = CUR;
8399 if (((cur >= 'a') && (cur <= 'z')) ||
8400 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008401 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008402 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008404 return(NULL);
8405 }
8406
8407 buf[len++] = cur;
8408 NEXT;
8409 cur = CUR;
8410 while (((cur >= 'a') && (cur <= 'z')) ||
8411 ((cur >= 'A') && (cur <= 'Z')) ||
8412 ((cur >= '0') && (cur <= '9')) ||
8413 (cur == '.') || (cur == '_') ||
8414 (cur == '-')) {
8415 if (len + 1 >= size) {
8416 size *= 2;
8417 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8418 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008419 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008420 return(NULL);
8421 }
8422 }
8423 buf[len++] = cur;
8424 NEXT;
8425 cur = CUR;
8426 if (cur == 0) {
8427 SHRINK;
8428 GROW;
8429 cur = CUR;
8430 }
8431 }
8432 buf[len] = 0;
8433 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008434 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008435 }
8436 return(buf);
8437}
8438
8439/**
8440 * xmlParseEncodingDecl:
8441 * @ctxt: an XML parser context
8442 *
8443 * parse the XML encoding declaration
8444 *
8445 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8446 *
8447 * this setups the conversion filters.
8448 *
8449 * Returns the encoding value or NULL
8450 */
8451
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008452const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008453xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8454 xmlChar *encoding = NULL;
8455 const xmlChar *q;
8456
8457 SKIP_BLANKS;
8458 if ((RAW == 'e') && (NXT(1) == 'n') &&
8459 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8460 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8461 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8462 SKIP(8);
8463 SKIP_BLANKS;
8464 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008465 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008466 return(NULL);
8467 }
8468 NEXT;
8469 SKIP_BLANKS;
8470 if (RAW == '"') {
8471 NEXT;
8472 q = CUR_PTR;
8473 encoding = xmlParseEncName(ctxt);
8474 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008475 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008476 } else
8477 NEXT;
8478 } else if (RAW == '\''){
8479 NEXT;
8480 q = CUR_PTR;
8481 encoding = xmlParseEncName(ctxt);
8482 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008483 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008484 } else
8485 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008486 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008487 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008488 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008489 /*
8490 * UTF-16 encoding stwich has already taken place at this stage,
8491 * more over the little-endian/big-endian selection is already done
8492 */
8493 if ((encoding != NULL) &&
8494 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8495 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008496 if (ctxt->encoding != NULL)
8497 xmlFree((xmlChar *) ctxt->encoding);
8498 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008499 }
8500 /*
8501 * UTF-8 encoding is handled natively
8502 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008503 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008504 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8505 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008506 if (ctxt->encoding != NULL)
8507 xmlFree((xmlChar *) ctxt->encoding);
8508 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008509 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008510 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008511 xmlCharEncodingHandlerPtr handler;
8512
8513 if (ctxt->input->encoding != NULL)
8514 xmlFree((xmlChar *) ctxt->input->encoding);
8515 ctxt->input->encoding = encoding;
8516
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008517 handler = xmlFindCharEncodingHandler((const char *) encoding);
8518 if (handler != NULL) {
8519 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008520 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008521 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8523 ctxt->sax->error(ctxt->userData,
8524 "Unsupported encoding %s\n", encoding);
8525 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008526 }
8527 }
8528 }
8529 return(encoding);
8530}
8531
8532/**
8533 * xmlParseSDDecl:
8534 * @ctxt: an XML parser context
8535 *
8536 * parse the XML standalone declaration
8537 *
8538 * [32] SDDecl ::= S 'standalone' Eq
8539 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8540 *
8541 * [ VC: Standalone Document Declaration ]
8542 * TODO The standalone document declaration must have the value "no"
8543 * if any external markup declarations contain declarations of:
8544 * - attributes with default values, if elements to which these
8545 * attributes apply appear in the document without specifications
8546 * of values for these attributes, or
8547 * - entities (other than amp, lt, gt, apos, quot), if references
8548 * to those entities appear in the document, or
8549 * - attributes with values subject to normalization, where the
8550 * attribute appears in the document with a value which will change
8551 * as a result of normalization, or
8552 * - element types with element content, if white space occurs directly
8553 * within any instance of those types.
8554 *
8555 * Returns 1 if standalone, 0 otherwise
8556 */
8557
8558int
8559xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8560 int standalone = -1;
8561
8562 SKIP_BLANKS;
8563 if ((RAW == 's') && (NXT(1) == 't') &&
8564 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8565 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8566 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8567 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8568 SKIP(10);
8569 SKIP_BLANKS;
8570 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008571 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008572 return(standalone);
8573 }
8574 NEXT;
8575 SKIP_BLANKS;
8576 if (RAW == '\''){
8577 NEXT;
8578 if ((RAW == 'n') && (NXT(1) == 'o')) {
8579 standalone = 0;
8580 SKIP(2);
8581 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8582 (NXT(2) == 's')) {
8583 standalone = 1;
8584 SKIP(3);
8585 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008586 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008587 }
8588 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008589 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008590 } else
8591 NEXT;
8592 } else if (RAW == '"'){
8593 NEXT;
8594 if ((RAW == 'n') && (NXT(1) == 'o')) {
8595 standalone = 0;
8596 SKIP(2);
8597 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8598 (NXT(2) == 's')) {
8599 standalone = 1;
8600 SKIP(3);
8601 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008602 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 }
8604 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008605 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008606 } else
8607 NEXT;
8608 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008609 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008610 }
8611 }
8612 return(standalone);
8613}
8614
8615/**
8616 * xmlParseXMLDecl:
8617 * @ctxt: an XML parser context
8618 *
8619 * parse an XML declaration header
8620 *
8621 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8622 */
8623
8624void
8625xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8626 xmlChar *version;
8627
8628 /*
8629 * We know that '<?xml' is here.
8630 */
8631 SKIP(5);
8632
8633 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008634 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8635 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008636 }
8637 SKIP_BLANKS;
8638
8639 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008640 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008641 */
8642 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008643 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008644 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008645 } else {
8646 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8647 /*
8648 * TODO: Blueberry should be detected here
8649 */
8650 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
8651 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
8652 version);
8653 }
8654 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008655 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008656 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008657 }
Owen Taylor3473f882001-02-23 17:55:21 +00008658
8659 /*
8660 * We may have the encoding declaration
8661 */
8662 if (!IS_BLANK(RAW)) {
8663 if ((RAW == '?') && (NXT(1) == '>')) {
8664 SKIP(2);
8665 return;
8666 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008667 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008668 }
8669 xmlParseEncodingDecl(ctxt);
8670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8671 /*
8672 * The XML REC instructs us to stop parsing right here
8673 */
8674 return;
8675 }
8676
8677 /*
8678 * We may have the standalone status.
8679 */
8680 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8681 if ((RAW == '?') && (NXT(1) == '>')) {
8682 SKIP(2);
8683 return;
8684 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008686 }
8687 SKIP_BLANKS;
8688 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8689
8690 SKIP_BLANKS;
8691 if ((RAW == '?') && (NXT(1) == '>')) {
8692 SKIP(2);
8693 } else if (RAW == '>') {
8694 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008695 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 NEXT;
8697 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008699 MOVETO_ENDTAG(CUR_PTR);
8700 NEXT;
8701 }
8702}
8703
8704/**
8705 * xmlParseMisc:
8706 * @ctxt: an XML parser context
8707 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008708 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008709 *
8710 * [27] Misc ::= Comment | PI | S
8711 */
8712
8713void
8714xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008715 while (((RAW == '<') && (NXT(1) == '?')) ||
8716 ((RAW == '<') && (NXT(1) == '!') &&
8717 (NXT(2) == '-') && (NXT(3) == '-')) ||
8718 IS_BLANK(CUR)) {
8719 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008720 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008721 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008722 NEXT;
8723 } else
8724 xmlParseComment(ctxt);
8725 }
8726}
8727
8728/**
8729 * xmlParseDocument:
8730 * @ctxt: an XML parser context
8731 *
8732 * parse an XML document (and build a tree if using the standard SAX
8733 * interface).
8734 *
8735 * [1] document ::= prolog element Misc*
8736 *
8737 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8738 *
8739 * Returns 0, -1 in case of error. the parser context is augmented
8740 * as a result of the parsing.
8741 */
8742
8743int
8744xmlParseDocument(xmlParserCtxtPtr ctxt) {
8745 xmlChar start[4];
8746 xmlCharEncoding enc;
8747
8748 xmlInitParser();
8749
8750 GROW;
8751
8752 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008753 * SAX: detecting the level.
8754 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008755 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756
8757 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008758 * SAX: beginning of the document processing.
8759 */
8760 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8761 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8762
Daniel Veillard50f34372001-08-03 12:06:36 +00008763 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008764 /*
8765 * Get the 4 first bytes and decode the charset
8766 * if enc != XML_CHAR_ENCODING_NONE
8767 * plug some encoding conversion routines.
8768 */
8769 start[0] = RAW;
8770 start[1] = NXT(1);
8771 start[2] = NXT(2);
8772 start[3] = NXT(3);
8773 enc = xmlDetectCharEncoding(start, 4);
8774 if (enc != XML_CHAR_ENCODING_NONE) {
8775 xmlSwitchEncoding(ctxt, enc);
8776 }
Owen Taylor3473f882001-02-23 17:55:21 +00008777 }
8778
8779
8780 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008781 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008782 }
8783
8784 /*
8785 * Check for the XMLDecl in the Prolog.
8786 */
8787 GROW;
8788 if ((RAW == '<') && (NXT(1) == '?') &&
8789 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8790 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8791
8792 /*
8793 * Note that we will switch encoding on the fly.
8794 */
8795 xmlParseXMLDecl(ctxt);
8796 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8797 /*
8798 * The XML REC instructs us to stop parsing right here
8799 */
8800 return(-1);
8801 }
8802 ctxt->standalone = ctxt->input->standalone;
8803 SKIP_BLANKS;
8804 } else {
8805 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8806 }
8807 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8808 ctxt->sax->startDocument(ctxt->userData);
8809
8810 /*
8811 * The Misc part of the Prolog
8812 */
8813 GROW;
8814 xmlParseMisc(ctxt);
8815
8816 /*
8817 * Then possibly doc type declaration(s) and more Misc
8818 * (doctypedecl Misc*)?
8819 */
8820 GROW;
8821 if ((RAW == '<') && (NXT(1) == '!') &&
8822 (NXT(2) == 'D') && (NXT(3) == 'O') &&
8823 (NXT(4) == 'C') && (NXT(5) == 'T') &&
8824 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8825 (NXT(8) == 'E')) {
8826
8827 ctxt->inSubset = 1;
8828 xmlParseDocTypeDecl(ctxt);
8829 if (RAW == '[') {
8830 ctxt->instate = XML_PARSER_DTD;
8831 xmlParseInternalSubset(ctxt);
8832 }
8833
8834 /*
8835 * Create and update the external subset.
8836 */
8837 ctxt->inSubset = 2;
8838 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8839 (!ctxt->disableSAX))
8840 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8841 ctxt->extSubSystem, ctxt->extSubURI);
8842 ctxt->inSubset = 0;
8843
8844
8845 ctxt->instate = XML_PARSER_PROLOG;
8846 xmlParseMisc(ctxt);
8847 }
8848
8849 /*
8850 * Time to start parsing the tree itself
8851 */
8852 GROW;
8853 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008854 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8855 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008856 } else {
8857 ctxt->instate = XML_PARSER_CONTENT;
8858 xmlParseElement(ctxt);
8859 ctxt->instate = XML_PARSER_EPILOG;
8860
8861
8862 /*
8863 * The Misc part at the end
8864 */
8865 xmlParseMisc(ctxt);
8866
Daniel Veillard561b7f82002-03-20 21:55:57 +00008867 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008869 }
8870 ctxt->instate = XML_PARSER_EOF;
8871 }
8872
8873 /*
8874 * SAX: end of the document processing.
8875 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008876 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008877 ctxt->sax->endDocument(ctxt->userData);
8878
Daniel Veillard5997aca2002-03-18 18:36:20 +00008879 /*
8880 * Remove locally kept entity definitions if the tree was not built
8881 */
8882 if ((ctxt->myDoc != NULL) &&
8883 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8884 xmlFreeDoc(ctxt->myDoc);
8885 ctxt->myDoc = NULL;
8886 }
8887
Daniel Veillardc7612992002-02-17 22:47:37 +00008888 if (! ctxt->wellFormed) {
8889 ctxt->valid = 0;
8890 return(-1);
8891 }
Owen Taylor3473f882001-02-23 17:55:21 +00008892 return(0);
8893}
8894
8895/**
8896 * xmlParseExtParsedEnt:
8897 * @ctxt: an XML parser context
8898 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008899 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008900 * An external general parsed entity is well-formed if it matches the
8901 * production labeled extParsedEnt.
8902 *
8903 * [78] extParsedEnt ::= TextDecl? content
8904 *
8905 * Returns 0, -1 in case of error. the parser context is augmented
8906 * as a result of the parsing.
8907 */
8908
8909int
8910xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8911 xmlChar start[4];
8912 xmlCharEncoding enc;
8913
8914 xmlDefaultSAXHandlerInit();
8915
Daniel Veillard309f81d2003-09-23 09:02:53 +00008916 xmlDetectSAX2(ctxt);
8917
Owen Taylor3473f882001-02-23 17:55:21 +00008918 GROW;
8919
8920 /*
8921 * SAX: beginning of the document processing.
8922 */
8923 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8924 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8925
8926 /*
8927 * Get the 4 first bytes and decode the charset
8928 * if enc != XML_CHAR_ENCODING_NONE
8929 * plug some encoding conversion routines.
8930 */
8931 start[0] = RAW;
8932 start[1] = NXT(1);
8933 start[2] = NXT(2);
8934 start[3] = NXT(3);
8935 enc = xmlDetectCharEncoding(start, 4);
8936 if (enc != XML_CHAR_ENCODING_NONE) {
8937 xmlSwitchEncoding(ctxt, enc);
8938 }
8939
8940
8941 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008943 }
8944
8945 /*
8946 * Check for the XMLDecl in the Prolog.
8947 */
8948 GROW;
8949 if ((RAW == '<') && (NXT(1) == '?') &&
8950 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8951 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8952
8953 /*
8954 * Note that we will switch encoding on the fly.
8955 */
8956 xmlParseXMLDecl(ctxt);
8957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8958 /*
8959 * The XML REC instructs us to stop parsing right here
8960 */
8961 return(-1);
8962 }
8963 SKIP_BLANKS;
8964 } else {
8965 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8966 }
8967 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8968 ctxt->sax->startDocument(ctxt->userData);
8969
8970 /*
8971 * Doing validity checking on chunk doesn't make sense
8972 */
8973 ctxt->instate = XML_PARSER_CONTENT;
8974 ctxt->validate = 0;
8975 ctxt->loadsubset = 0;
8976 ctxt->depth = 0;
8977
8978 xmlParseContent(ctxt);
8979
8980 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008982 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008984 }
8985
8986 /*
8987 * SAX: end of the document processing.
8988 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008989 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008990 ctxt->sax->endDocument(ctxt->userData);
8991
8992 if (! ctxt->wellFormed) return(-1);
8993 return(0);
8994}
8995
8996/************************************************************************
8997 * *
8998 * Progressive parsing interfaces *
8999 * *
9000 ************************************************************************/
9001
9002/**
9003 * xmlParseLookupSequence:
9004 * @ctxt: an XML parser context
9005 * @first: the first char to lookup
9006 * @next: the next char to lookup or zero
9007 * @third: the next char to lookup or zero
9008 *
9009 * Try to find if a sequence (first, next, third) or just (first next) or
9010 * (first) is available in the input stream.
9011 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9012 * to avoid rescanning sequences of bytes, it DOES change the state of the
9013 * parser, do not use liberally.
9014 *
9015 * Returns the index to the current parsing point if the full sequence
9016 * is available, -1 otherwise.
9017 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009018static int
Owen Taylor3473f882001-02-23 17:55:21 +00009019xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9020 xmlChar next, xmlChar third) {
9021 int base, len;
9022 xmlParserInputPtr in;
9023 const xmlChar *buf;
9024
9025 in = ctxt->input;
9026 if (in == NULL) return(-1);
9027 base = in->cur - in->base;
9028 if (base < 0) return(-1);
9029 if (ctxt->checkIndex > base)
9030 base = ctxt->checkIndex;
9031 if (in->buf == NULL) {
9032 buf = in->base;
9033 len = in->length;
9034 } else {
9035 buf = in->buf->buffer->content;
9036 len = in->buf->buffer->use;
9037 }
9038 /* take into account the sequence length */
9039 if (third) len -= 2;
9040 else if (next) len --;
9041 for (;base < len;base++) {
9042 if (buf[base] == first) {
9043 if (third != 0) {
9044 if ((buf[base + 1] != next) ||
9045 (buf[base + 2] != third)) continue;
9046 } else if (next != 0) {
9047 if (buf[base + 1] != next) continue;
9048 }
9049 ctxt->checkIndex = 0;
9050#ifdef DEBUG_PUSH
9051 if (next == 0)
9052 xmlGenericError(xmlGenericErrorContext,
9053 "PP: lookup '%c' found at %d\n",
9054 first, base);
9055 else if (third == 0)
9056 xmlGenericError(xmlGenericErrorContext,
9057 "PP: lookup '%c%c' found at %d\n",
9058 first, next, base);
9059 else
9060 xmlGenericError(xmlGenericErrorContext,
9061 "PP: lookup '%c%c%c' found at %d\n",
9062 first, next, third, base);
9063#endif
9064 return(base - (in->cur - in->base));
9065 }
9066 }
9067 ctxt->checkIndex = base;
9068#ifdef DEBUG_PUSH
9069 if (next == 0)
9070 xmlGenericError(xmlGenericErrorContext,
9071 "PP: lookup '%c' failed\n", first);
9072 else if (third == 0)
9073 xmlGenericError(xmlGenericErrorContext,
9074 "PP: lookup '%c%c' failed\n", first, next);
9075 else
9076 xmlGenericError(xmlGenericErrorContext,
9077 "PP: lookup '%c%c%c' failed\n", first, next, third);
9078#endif
9079 return(-1);
9080}
9081
9082/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009083 * xmlParseGetLasts:
9084 * @ctxt: an XML parser context
9085 * @lastlt: pointer to store the last '<' from the input
9086 * @lastgt: pointer to store the last '>' from the input
9087 *
9088 * Lookup the last < and > in the current chunk
9089 */
9090static void
9091xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9092 const xmlChar **lastgt) {
9093 const xmlChar *tmp;
9094
9095 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9096 xmlGenericError(xmlGenericErrorContext,
9097 "Internal error: xmlParseGetLasts\n");
9098 return;
9099 }
9100 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9101 tmp = ctxt->input->end;
9102 tmp--;
9103 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9104 (*tmp != '>')) tmp--;
9105 if (tmp < ctxt->input->base) {
9106 *lastlt = NULL;
9107 *lastgt = NULL;
9108 } else if (*tmp == '<') {
9109 *lastlt = tmp;
9110 tmp--;
9111 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9112 if (tmp < ctxt->input->base)
9113 *lastgt = NULL;
9114 else
9115 *lastgt = tmp;
9116 } else {
9117 *lastgt = tmp;
9118 tmp--;
9119 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9120 if (tmp < ctxt->input->base)
9121 *lastlt = NULL;
9122 else
9123 *lastlt = tmp;
9124 }
9125
9126 } else {
9127 *lastlt = NULL;
9128 *lastgt = NULL;
9129 }
9130}
9131/**
Owen Taylor3473f882001-02-23 17:55:21 +00009132 * xmlParseTryOrFinish:
9133 * @ctxt: an XML parser context
9134 * @terminate: last chunk indicator
9135 *
9136 * Try to progress on parsing
9137 *
9138 * Returns zero if no parsing was possible
9139 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009140static int
Owen Taylor3473f882001-02-23 17:55:21 +00009141xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9142 int ret = 0;
9143 int avail;
9144 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009145 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009146
9147#ifdef DEBUG_PUSH
9148 switch (ctxt->instate) {
9149 case XML_PARSER_EOF:
9150 xmlGenericError(xmlGenericErrorContext,
9151 "PP: try EOF\n"); break;
9152 case XML_PARSER_START:
9153 xmlGenericError(xmlGenericErrorContext,
9154 "PP: try START\n"); break;
9155 case XML_PARSER_MISC:
9156 xmlGenericError(xmlGenericErrorContext,
9157 "PP: try MISC\n");break;
9158 case XML_PARSER_COMMENT:
9159 xmlGenericError(xmlGenericErrorContext,
9160 "PP: try COMMENT\n");break;
9161 case XML_PARSER_PROLOG:
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: try PROLOG\n");break;
9164 case XML_PARSER_START_TAG:
9165 xmlGenericError(xmlGenericErrorContext,
9166 "PP: try START_TAG\n");break;
9167 case XML_PARSER_CONTENT:
9168 xmlGenericError(xmlGenericErrorContext,
9169 "PP: try CONTENT\n");break;
9170 case XML_PARSER_CDATA_SECTION:
9171 xmlGenericError(xmlGenericErrorContext,
9172 "PP: try CDATA_SECTION\n");break;
9173 case XML_PARSER_END_TAG:
9174 xmlGenericError(xmlGenericErrorContext,
9175 "PP: try END_TAG\n");break;
9176 case XML_PARSER_ENTITY_DECL:
9177 xmlGenericError(xmlGenericErrorContext,
9178 "PP: try ENTITY_DECL\n");break;
9179 case XML_PARSER_ENTITY_VALUE:
9180 xmlGenericError(xmlGenericErrorContext,
9181 "PP: try ENTITY_VALUE\n");break;
9182 case XML_PARSER_ATTRIBUTE_VALUE:
9183 xmlGenericError(xmlGenericErrorContext,
9184 "PP: try ATTRIBUTE_VALUE\n");break;
9185 case XML_PARSER_DTD:
9186 xmlGenericError(xmlGenericErrorContext,
9187 "PP: try DTD\n");break;
9188 case XML_PARSER_EPILOG:
9189 xmlGenericError(xmlGenericErrorContext,
9190 "PP: try EPILOG\n");break;
9191 case XML_PARSER_PI:
9192 xmlGenericError(xmlGenericErrorContext,
9193 "PP: try PI\n");break;
9194 case XML_PARSER_IGNORE:
9195 xmlGenericError(xmlGenericErrorContext,
9196 "PP: try IGNORE\n");break;
9197 }
9198#endif
9199
Daniel Veillarda880b122003-04-21 21:36:41 +00009200 if (ctxt->input->cur - ctxt->input->base > 4096) {
9201 xmlSHRINK(ctxt);
9202 ctxt->checkIndex = 0;
9203 }
9204 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009205
Daniel Veillarda880b122003-04-21 21:36:41 +00009206 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009207 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9208 return(0);
9209
9210
Owen Taylor3473f882001-02-23 17:55:21 +00009211 /*
9212 * Pop-up of finished entities.
9213 */
9214 while ((RAW == 0) && (ctxt->inputNr > 1))
9215 xmlPopInput(ctxt);
9216
9217 if (ctxt->input ==NULL) break;
9218 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009219 avail = ctxt->input->length -
9220 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009221 else {
9222 /*
9223 * If we are operating on converted input, try to flush
9224 * remainng chars to avoid them stalling in the non-converted
9225 * buffer.
9226 */
9227 if ((ctxt->input->buf->raw != NULL) &&
9228 (ctxt->input->buf->raw->use > 0)) {
9229 int base = ctxt->input->base -
9230 ctxt->input->buf->buffer->content;
9231 int current = ctxt->input->cur - ctxt->input->base;
9232
9233 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9234 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9235 ctxt->input->cur = ctxt->input->base + current;
9236 ctxt->input->end =
9237 &ctxt->input->buf->buffer->content[
9238 ctxt->input->buf->buffer->use];
9239 }
9240 avail = ctxt->input->buf->buffer->use -
9241 (ctxt->input->cur - ctxt->input->base);
9242 }
Owen Taylor3473f882001-02-23 17:55:21 +00009243 if (avail < 1)
9244 goto done;
9245 switch (ctxt->instate) {
9246 case XML_PARSER_EOF:
9247 /*
9248 * Document parsing is done !
9249 */
9250 goto done;
9251 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009252 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9253 xmlChar start[4];
9254 xmlCharEncoding enc;
9255
9256 /*
9257 * Very first chars read from the document flow.
9258 */
9259 if (avail < 4)
9260 goto done;
9261
9262 /*
9263 * Get the 4 first bytes and decode the charset
9264 * if enc != XML_CHAR_ENCODING_NONE
9265 * plug some encoding conversion routines.
9266 */
9267 start[0] = RAW;
9268 start[1] = NXT(1);
9269 start[2] = NXT(2);
9270 start[3] = NXT(3);
9271 enc = xmlDetectCharEncoding(start, 4);
9272 if (enc != XML_CHAR_ENCODING_NONE) {
9273 xmlSwitchEncoding(ctxt, enc);
9274 }
9275 break;
9276 }
Owen Taylor3473f882001-02-23 17:55:21 +00009277
9278 cur = ctxt->input->cur[0];
9279 next = ctxt->input->cur[1];
9280 if (cur == 0) {
9281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9282 ctxt->sax->setDocumentLocator(ctxt->userData,
9283 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009284 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009285 ctxt->instate = XML_PARSER_EOF;
9286#ifdef DEBUG_PUSH
9287 xmlGenericError(xmlGenericErrorContext,
9288 "PP: entering EOF\n");
9289#endif
9290 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9291 ctxt->sax->endDocument(ctxt->userData);
9292 goto done;
9293 }
9294 if ((cur == '<') && (next == '?')) {
9295 /* PI or XML decl */
9296 if (avail < 5) return(ret);
9297 if ((!terminate) &&
9298 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9299 return(ret);
9300 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9301 ctxt->sax->setDocumentLocator(ctxt->userData,
9302 &xmlDefaultSAXLocator);
9303 if ((ctxt->input->cur[2] == 'x') &&
9304 (ctxt->input->cur[3] == 'm') &&
9305 (ctxt->input->cur[4] == 'l') &&
9306 (IS_BLANK(ctxt->input->cur[5]))) {
9307 ret += 5;
9308#ifdef DEBUG_PUSH
9309 xmlGenericError(xmlGenericErrorContext,
9310 "PP: Parsing XML Decl\n");
9311#endif
9312 xmlParseXMLDecl(ctxt);
9313 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9314 /*
9315 * The XML REC instructs us to stop parsing right
9316 * here
9317 */
9318 ctxt->instate = XML_PARSER_EOF;
9319 return(0);
9320 }
9321 ctxt->standalone = ctxt->input->standalone;
9322 if ((ctxt->encoding == NULL) &&
9323 (ctxt->input->encoding != NULL))
9324 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9325 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9326 (!ctxt->disableSAX))
9327 ctxt->sax->startDocument(ctxt->userData);
9328 ctxt->instate = XML_PARSER_MISC;
9329#ifdef DEBUG_PUSH
9330 xmlGenericError(xmlGenericErrorContext,
9331 "PP: entering MISC\n");
9332#endif
9333 } else {
9334 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9335 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9336 (!ctxt->disableSAX))
9337 ctxt->sax->startDocument(ctxt->userData);
9338 ctxt->instate = XML_PARSER_MISC;
9339#ifdef DEBUG_PUSH
9340 xmlGenericError(xmlGenericErrorContext,
9341 "PP: entering MISC\n");
9342#endif
9343 }
9344 } else {
9345 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9346 ctxt->sax->setDocumentLocator(ctxt->userData,
9347 &xmlDefaultSAXLocator);
9348 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9349 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9350 (!ctxt->disableSAX))
9351 ctxt->sax->startDocument(ctxt->userData);
9352 ctxt->instate = XML_PARSER_MISC;
9353#ifdef DEBUG_PUSH
9354 xmlGenericError(xmlGenericErrorContext,
9355 "PP: entering MISC\n");
9356#endif
9357 }
9358 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009359 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009360 const xmlChar *name;
9361 const xmlChar *prefix;
9362 const xmlChar *URI;
9363 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009364
9365 if ((avail < 2) && (ctxt->inputNr == 1))
9366 goto done;
9367 cur = ctxt->input->cur[0];
9368 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009369 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009370 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9372 ctxt->sax->endDocument(ctxt->userData);
9373 goto done;
9374 }
9375 if (!terminate) {
9376 if (ctxt->progressive) {
9377 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9378 goto done;
9379 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9380 goto done;
9381 }
9382 }
9383 if (ctxt->spaceNr == 0)
9384 spacePush(ctxt, -1);
9385 else
9386 spacePush(ctxt, *ctxt->space);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009387 if (ctxt->sax2)
9388 name = xmlParseStartTag2(ctxt, &prefix, &URI);
9389 else
9390 name = xmlParseStartTag(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009391 if (name == NULL) {
9392 spacePop(ctxt);
9393 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9395 ctxt->sax->endDocument(ctxt->userData);
9396 goto done;
9397 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009398 /*
9399 * [ VC: Root Element Type ]
9400 * The Name in the document type declaration must match
9401 * the element type of the root element.
9402 */
9403 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9404 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9405 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9406
9407 /*
9408 * Check for an Empty Element.
9409 */
9410 if ((RAW == '/') && (NXT(1) == '>')) {
9411 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009412
9413 if (ctxt->sax2) {
9414 if ((ctxt->sax != NULL) &&
9415 (ctxt->sax->endElementNs != NULL) &&
9416 (!ctxt->disableSAX))
9417 ctxt->sax->endElementNs(ctxt->userData, name,
9418 prefix, URI);
9419 } else {
9420 if ((ctxt->sax != NULL) &&
9421 (ctxt->sax->endElement != NULL) &&
9422 (!ctxt->disableSAX))
9423 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009424 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009425 spacePop(ctxt);
9426 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009427 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009428 } else {
9429 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009430 }
9431 break;
9432 }
9433 if (RAW == '>') {
9434 NEXT;
9435 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009436 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009437 "Couldn't find end of Start Tag %s\n",
9438 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009439 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009440 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009441 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009442 if (ctxt->sax2)
9443 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9444 else
9445 namePush(ctxt, name);
9446
Daniel Veillarda880b122003-04-21 21:36:41 +00009447 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 break;
9449 }
9450 case XML_PARSER_CONTENT: {
9451 const xmlChar *test;
9452 unsigned int cons;
9453 if ((avail < 2) && (ctxt->inputNr == 1))
9454 goto done;
9455 cur = ctxt->input->cur[0];
9456 next = ctxt->input->cur[1];
9457
9458 test = CUR_PTR;
9459 cons = ctxt->input->consumed;
9460 if ((cur == '<') && (next == '/')) {
9461 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009462 break;
9463 } else if ((cur == '<') && (next == '?')) {
9464 if ((!terminate) &&
9465 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9466 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009467 xmlParsePI(ctxt);
9468 } else if ((cur == '<') && (next != '!')) {
9469 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009470 break;
9471 } else if ((cur == '<') && (next == '!') &&
9472 (ctxt->input->cur[2] == '-') &&
9473 (ctxt->input->cur[3] == '-')) {
9474 if ((!terminate) &&
9475 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9476 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 xmlParseComment(ctxt);
9478 ctxt->instate = XML_PARSER_CONTENT;
9479 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9480 (ctxt->input->cur[2] == '[') &&
9481 (ctxt->input->cur[3] == 'C') &&
9482 (ctxt->input->cur[4] == 'D') &&
9483 (ctxt->input->cur[5] == 'A') &&
9484 (ctxt->input->cur[6] == 'T') &&
9485 (ctxt->input->cur[7] == 'A') &&
9486 (ctxt->input->cur[8] == '[')) {
9487 SKIP(9);
9488 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009489 break;
9490 } else if ((cur == '<') && (next == '!') &&
9491 (avail < 9)) {
9492 goto done;
9493 } else if (cur == '&') {
9494 if ((!terminate) &&
9495 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9496 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 xmlParseReference(ctxt);
9498 } else {
9499 /* TODO Avoid the extra copy, handle directly !!! */
9500 /*
9501 * Goal of the following test is:
9502 * - minimize calls to the SAX 'character' callback
9503 * when they are mergeable
9504 * - handle an problem for isBlank when we only parse
9505 * a sequence of blank chars and the next one is
9506 * not available to check against '<' presence.
9507 * - tries to homogenize the differences in SAX
9508 * callbacks between the push and pull versions
9509 * of the parser.
9510 */
9511 if ((ctxt->inputNr == 1) &&
9512 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9513 if (!terminate) {
9514 if (ctxt->progressive) {
9515 if ((lastlt == NULL) ||
9516 (ctxt->input->cur > lastlt))
9517 goto done;
9518 } else if (xmlParseLookupSequence(ctxt,
9519 '<', 0, 0) < 0) {
9520 goto done;
9521 }
9522 }
9523 }
9524 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009525 xmlParseCharData(ctxt, 0);
9526 }
9527 /*
9528 * Pop-up of finished entities.
9529 */
9530 while ((RAW == 0) && (ctxt->inputNr > 1))
9531 xmlPopInput(ctxt);
9532 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009533 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9534 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009535 ctxt->instate = XML_PARSER_EOF;
9536 break;
9537 }
9538 break;
9539 }
9540 case XML_PARSER_END_TAG:
9541 if (avail < 2)
9542 goto done;
9543 if (!terminate) {
9544 if (ctxt->progressive) {
9545 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9546 goto done;
9547 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9548 goto done;
9549 }
9550 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009551 if (ctxt->sax2) {
9552 xmlParseEndTag2(ctxt,
9553 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9554 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9555 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9556 nameNsPop(ctxt);
9557 } else
9558 xmlParseEndTag1(ctxt, 0);
9559 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009560 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009561 } else {
9562 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009563 }
9564 break;
9565 case XML_PARSER_CDATA_SECTION: {
9566 /*
9567 * The Push mode need to have the SAX callback for
9568 * cdataBlock merge back contiguous callbacks.
9569 */
9570 int base;
9571
9572 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9573 if (base < 0) {
9574 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9576 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009577 ctxt->sax->cdataBlock(ctxt->userData,
9578 ctxt->input->cur,
9579 XML_PARSER_BIG_BUFFER_SIZE);
9580 else if (ctxt->sax->characters != NULL)
9581 ctxt->sax->characters(ctxt->userData,
9582 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009583 XML_PARSER_BIG_BUFFER_SIZE);
9584 }
9585 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9586 ctxt->checkIndex = 0;
9587 }
9588 goto done;
9589 } else {
9590 if ((ctxt->sax != NULL) && (base > 0) &&
9591 (!ctxt->disableSAX)) {
9592 if (ctxt->sax->cdataBlock != NULL)
9593 ctxt->sax->cdataBlock(ctxt->userData,
9594 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009595 else if (ctxt->sax->characters != NULL)
9596 ctxt->sax->characters(ctxt->userData,
9597 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009598 }
9599 SKIP(base + 3);
9600 ctxt->checkIndex = 0;
9601 ctxt->instate = XML_PARSER_CONTENT;
9602#ifdef DEBUG_PUSH
9603 xmlGenericError(xmlGenericErrorContext,
9604 "PP: entering CONTENT\n");
9605#endif
9606 }
9607 break;
9608 }
Owen Taylor3473f882001-02-23 17:55:21 +00009609 case XML_PARSER_MISC:
9610 SKIP_BLANKS;
9611 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009612 avail = ctxt->input->length -
9613 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009614 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009615 avail = ctxt->input->buf->buffer->use -
9616 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009617 if (avail < 2)
9618 goto done;
9619 cur = ctxt->input->cur[0];
9620 next = ctxt->input->cur[1];
9621 if ((cur == '<') && (next == '?')) {
9622 if ((!terminate) &&
9623 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9624 goto done;
9625#ifdef DEBUG_PUSH
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: Parsing PI\n");
9628#endif
9629 xmlParsePI(ctxt);
9630 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009631 (ctxt->input->cur[2] == '-') &&
9632 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009633 if ((!terminate) &&
9634 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9635 goto done;
9636#ifdef DEBUG_PUSH
9637 xmlGenericError(xmlGenericErrorContext,
9638 "PP: Parsing Comment\n");
9639#endif
9640 xmlParseComment(ctxt);
9641 ctxt->instate = XML_PARSER_MISC;
9642 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009643 (ctxt->input->cur[2] == 'D') &&
9644 (ctxt->input->cur[3] == 'O') &&
9645 (ctxt->input->cur[4] == 'C') &&
9646 (ctxt->input->cur[5] == 'T') &&
9647 (ctxt->input->cur[6] == 'Y') &&
9648 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009649 (ctxt->input->cur[8] == 'E')) {
9650 if ((!terminate) &&
9651 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9652 goto done;
9653#ifdef DEBUG_PUSH
9654 xmlGenericError(xmlGenericErrorContext,
9655 "PP: Parsing internal subset\n");
9656#endif
9657 ctxt->inSubset = 1;
9658 xmlParseDocTypeDecl(ctxt);
9659 if (RAW == '[') {
9660 ctxt->instate = XML_PARSER_DTD;
9661#ifdef DEBUG_PUSH
9662 xmlGenericError(xmlGenericErrorContext,
9663 "PP: entering DTD\n");
9664#endif
9665 } else {
9666 /*
9667 * Create and update the external subset.
9668 */
9669 ctxt->inSubset = 2;
9670 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9671 (ctxt->sax->externalSubset != NULL))
9672 ctxt->sax->externalSubset(ctxt->userData,
9673 ctxt->intSubName, ctxt->extSubSystem,
9674 ctxt->extSubURI);
9675 ctxt->inSubset = 0;
9676 ctxt->instate = XML_PARSER_PROLOG;
9677#ifdef DEBUG_PUSH
9678 xmlGenericError(xmlGenericErrorContext,
9679 "PP: entering PROLOG\n");
9680#endif
9681 }
9682 } else if ((cur == '<') && (next == '!') &&
9683 (avail < 9)) {
9684 goto done;
9685 } else {
9686 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009687 ctxt->progressive = 1;
9688 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009689#ifdef DEBUG_PUSH
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: entering START_TAG\n");
9692#endif
9693 }
9694 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009695 case XML_PARSER_PROLOG:
9696 SKIP_BLANKS;
9697 if (ctxt->input->buf == NULL)
9698 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9699 else
9700 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9701 if (avail < 2)
9702 goto done;
9703 cur = ctxt->input->cur[0];
9704 next = ctxt->input->cur[1];
9705 if ((cur == '<') && (next == '?')) {
9706 if ((!terminate) &&
9707 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9708 goto done;
9709#ifdef DEBUG_PUSH
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: Parsing PI\n");
9712#endif
9713 xmlParsePI(ctxt);
9714 } else if ((cur == '<') && (next == '!') &&
9715 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9716 if ((!terminate) &&
9717 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9718 goto done;
9719#ifdef DEBUG_PUSH
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: Parsing Comment\n");
9722#endif
9723 xmlParseComment(ctxt);
9724 ctxt->instate = XML_PARSER_PROLOG;
9725 } else if ((cur == '<') && (next == '!') &&
9726 (avail < 4)) {
9727 goto done;
9728 } else {
9729 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009730 ctxt->progressive = 1;
9731 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009732#ifdef DEBUG_PUSH
9733 xmlGenericError(xmlGenericErrorContext,
9734 "PP: entering START_TAG\n");
9735#endif
9736 }
9737 break;
9738 case XML_PARSER_EPILOG:
9739 SKIP_BLANKS;
9740 if (ctxt->input->buf == NULL)
9741 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9742 else
9743 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9744 if (avail < 2)
9745 goto done;
9746 cur = ctxt->input->cur[0];
9747 next = ctxt->input->cur[1];
9748 if ((cur == '<') && (next == '?')) {
9749 if ((!terminate) &&
9750 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9751 goto done;
9752#ifdef DEBUG_PUSH
9753 xmlGenericError(xmlGenericErrorContext,
9754 "PP: Parsing PI\n");
9755#endif
9756 xmlParsePI(ctxt);
9757 ctxt->instate = XML_PARSER_EPILOG;
9758 } else if ((cur == '<') && (next == '!') &&
9759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9760 if ((!terminate) &&
9761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9762 goto done;
9763#ifdef DEBUG_PUSH
9764 xmlGenericError(xmlGenericErrorContext,
9765 "PP: Parsing Comment\n");
9766#endif
9767 xmlParseComment(ctxt);
9768 ctxt->instate = XML_PARSER_EPILOG;
9769 } else if ((cur == '<') && (next == '!') &&
9770 (avail < 4)) {
9771 goto done;
9772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009773 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009774 ctxt->instate = XML_PARSER_EOF;
9775#ifdef DEBUG_PUSH
9776 xmlGenericError(xmlGenericErrorContext,
9777 "PP: entering EOF\n");
9778#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009779 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009780 ctxt->sax->endDocument(ctxt->userData);
9781 goto done;
9782 }
9783 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009784 case XML_PARSER_DTD: {
9785 /*
9786 * Sorry but progressive parsing of the internal subset
9787 * is not expected to be supported. We first check that
9788 * the full content of the internal subset is available and
9789 * the parsing is launched only at that point.
9790 * Internal subset ends up with "']' S? '>'" in an unescaped
9791 * section and not in a ']]>' sequence which are conditional
9792 * sections (whoever argued to keep that crap in XML deserve
9793 * a place in hell !).
9794 */
9795 int base, i;
9796 xmlChar *buf;
9797 xmlChar quote = 0;
9798
9799 base = ctxt->input->cur - ctxt->input->base;
9800 if (base < 0) return(0);
9801 if (ctxt->checkIndex > base)
9802 base = ctxt->checkIndex;
9803 buf = ctxt->input->buf->buffer->content;
9804 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9805 base++) {
9806 if (quote != 0) {
9807 if (buf[base] == quote)
9808 quote = 0;
9809 continue;
9810 }
9811 if (buf[base] == '"') {
9812 quote = '"';
9813 continue;
9814 }
9815 if (buf[base] == '\'') {
9816 quote = '\'';
9817 continue;
9818 }
9819 if (buf[base] == ']') {
9820 if ((unsigned int) base +1 >=
9821 ctxt->input->buf->buffer->use)
9822 break;
9823 if (buf[base + 1] == ']') {
9824 /* conditional crap, skip both ']' ! */
9825 base++;
9826 continue;
9827 }
9828 for (i = 0;
9829 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9830 i++) {
9831 if (buf[base + i] == '>')
9832 goto found_end_int_subset;
9833 }
9834 break;
9835 }
9836 }
9837 /*
9838 * We didn't found the end of the Internal subset
9839 */
9840 if (quote == 0)
9841 ctxt->checkIndex = base;
9842#ifdef DEBUG_PUSH
9843 if (next == 0)
9844 xmlGenericError(xmlGenericErrorContext,
9845 "PP: lookup of int subset end filed\n");
9846#endif
9847 goto done;
9848
9849found_end_int_subset:
9850 xmlParseInternalSubset(ctxt);
9851 ctxt->inSubset = 2;
9852 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9853 (ctxt->sax->externalSubset != NULL))
9854 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9855 ctxt->extSubSystem, ctxt->extSubURI);
9856 ctxt->inSubset = 0;
9857 ctxt->instate = XML_PARSER_PROLOG;
9858 ctxt->checkIndex = 0;
9859#ifdef DEBUG_PUSH
9860 xmlGenericError(xmlGenericErrorContext,
9861 "PP: entering PROLOG\n");
9862#endif
9863 break;
9864 }
9865 case XML_PARSER_COMMENT:
9866 xmlGenericError(xmlGenericErrorContext,
9867 "PP: internal error, state == COMMENT\n");
9868 ctxt->instate = XML_PARSER_CONTENT;
9869#ifdef DEBUG_PUSH
9870 xmlGenericError(xmlGenericErrorContext,
9871 "PP: entering CONTENT\n");
9872#endif
9873 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009874 case XML_PARSER_IGNORE:
9875 xmlGenericError(xmlGenericErrorContext,
9876 "PP: internal error, state == IGNORE");
9877 ctxt->instate = XML_PARSER_DTD;
9878#ifdef DEBUG_PUSH
9879 xmlGenericError(xmlGenericErrorContext,
9880 "PP: entering DTD\n");
9881#endif
9882 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009883 case XML_PARSER_PI:
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: internal error, state == PI\n");
9886 ctxt->instate = XML_PARSER_CONTENT;
9887#ifdef DEBUG_PUSH
9888 xmlGenericError(xmlGenericErrorContext,
9889 "PP: entering CONTENT\n");
9890#endif
9891 break;
9892 case XML_PARSER_ENTITY_DECL:
9893 xmlGenericError(xmlGenericErrorContext,
9894 "PP: internal error, state == ENTITY_DECL\n");
9895 ctxt->instate = XML_PARSER_DTD;
9896#ifdef DEBUG_PUSH
9897 xmlGenericError(xmlGenericErrorContext,
9898 "PP: entering DTD\n");
9899#endif
9900 break;
9901 case XML_PARSER_ENTITY_VALUE:
9902 xmlGenericError(xmlGenericErrorContext,
9903 "PP: internal error, state == ENTITY_VALUE\n");
9904 ctxt->instate = XML_PARSER_CONTENT;
9905#ifdef DEBUG_PUSH
9906 xmlGenericError(xmlGenericErrorContext,
9907 "PP: entering DTD\n");
9908#endif
9909 break;
9910 case XML_PARSER_ATTRIBUTE_VALUE:
9911 xmlGenericError(xmlGenericErrorContext,
9912 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9913 ctxt->instate = XML_PARSER_START_TAG;
9914#ifdef DEBUG_PUSH
9915 xmlGenericError(xmlGenericErrorContext,
9916 "PP: entering START_TAG\n");
9917#endif
9918 break;
9919 case XML_PARSER_SYSTEM_LITERAL:
9920 xmlGenericError(xmlGenericErrorContext,
9921 "PP: internal error, state == SYSTEM_LITERAL\n");
9922 ctxt->instate = XML_PARSER_START_TAG;
9923#ifdef DEBUG_PUSH
9924 xmlGenericError(xmlGenericErrorContext,
9925 "PP: entering START_TAG\n");
9926#endif
9927 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009928 case XML_PARSER_PUBLIC_LITERAL:
9929 xmlGenericError(xmlGenericErrorContext,
9930 "PP: internal error, state == PUBLIC_LITERAL\n");
9931 ctxt->instate = XML_PARSER_START_TAG;
9932#ifdef DEBUG_PUSH
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: entering START_TAG\n");
9935#endif
9936 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009937 }
9938 }
9939done:
9940#ifdef DEBUG_PUSH
9941 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9942#endif
9943 return(ret);
9944}
9945
9946/**
Owen Taylor3473f882001-02-23 17:55:21 +00009947 * xmlParseChunk:
9948 * @ctxt: an XML parser context
9949 * @chunk: an char array
9950 * @size: the size in byte of the chunk
9951 * @terminate: last chunk indicator
9952 *
9953 * Parse a Chunk of memory
9954 *
9955 * Returns zero if no error, the xmlParserErrors otherwise.
9956 */
9957int
9958xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9959 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009960 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9961 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009962 if (ctxt->instate == XML_PARSER_START)
9963 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009964 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9965 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9966 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9967 int cur = ctxt->input->cur - ctxt->input->base;
9968
9969 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9970 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9971 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009972 ctxt->input->end =
9973 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009974#ifdef DEBUG_PUSH
9975 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9976#endif
9977
Owen Taylor3473f882001-02-23 17:55:21 +00009978 } else if (ctxt->instate != XML_PARSER_EOF) {
9979 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9980 xmlParserInputBufferPtr in = ctxt->input->buf;
9981 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9982 (in->raw != NULL)) {
9983 int nbchars;
9984
9985 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9986 if (nbchars < 0) {
9987 xmlGenericError(xmlGenericErrorContext,
9988 "xmlParseChunk: encoder error\n");
9989 return(XML_ERR_INVALID_ENCODING);
9990 }
9991 }
9992 }
9993 }
9994 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009995 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9996 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009997 if (terminate) {
9998 /*
9999 * Check for termination
10000 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010001 int avail = 0;
10002 if (ctxt->input->buf == NULL)
10003 avail = ctxt->input->length -
10004 (ctxt->input->cur - ctxt->input->base);
10005 else
10006 avail = ctxt->input->buf->buffer->use -
10007 (ctxt->input->cur - ctxt->input->base);
10008
Owen Taylor3473f882001-02-23 17:55:21 +000010009 if ((ctxt->instate != XML_PARSER_EOF) &&
10010 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010011 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010012 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010013 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010014 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010015 }
Owen Taylor3473f882001-02-23 17:55:21 +000010016 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010017 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010018 ctxt->sax->endDocument(ctxt->userData);
10019 }
10020 ctxt->instate = XML_PARSER_EOF;
10021 }
10022 return((xmlParserErrors) ctxt->errNo);
10023}
10024
10025/************************************************************************
10026 * *
10027 * I/O front end functions to the parser *
10028 * *
10029 ************************************************************************/
10030
10031/**
10032 * xmlStopParser:
10033 * @ctxt: an XML parser context
10034 *
10035 * Blocks further parser processing
10036 */
10037void
10038xmlStopParser(xmlParserCtxtPtr ctxt) {
10039 ctxt->instate = XML_PARSER_EOF;
10040 if (ctxt->input != NULL)
10041 ctxt->input->cur = BAD_CAST"";
10042}
10043
10044/**
10045 * xmlCreatePushParserCtxt:
10046 * @sax: a SAX handler
10047 * @user_data: The user data returned on SAX callbacks
10048 * @chunk: a pointer to an array of chars
10049 * @size: number of chars in the array
10050 * @filename: an optional file name or URI
10051 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010052 * Create a parser context for using the XML parser in push mode.
10053 * If @buffer and @size are non-NULL, the data is used to detect
10054 * the encoding. The remaining characters will be parsed so they
10055 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010056 * To allow content encoding detection, @size should be >= 4
10057 * The value of @filename is used for fetching external entities
10058 * and error/warning reports.
10059 *
10060 * Returns the new parser context or NULL
10061 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010062
Owen Taylor3473f882001-02-23 17:55:21 +000010063xmlParserCtxtPtr
10064xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10065 const char *chunk, int size, const char *filename) {
10066 xmlParserCtxtPtr ctxt;
10067 xmlParserInputPtr inputStream;
10068 xmlParserInputBufferPtr buf;
10069 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10070
10071 /*
10072 * plug some encoding conversion routines
10073 */
10074 if ((chunk != NULL) && (size >= 4))
10075 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10076
10077 buf = xmlAllocParserInputBuffer(enc);
10078 if (buf == NULL) return(NULL);
10079
10080 ctxt = xmlNewParserCtxt();
10081 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010082 xmlGenericError(xmlGenericErrorContext,
10083 "xml parser: out of memory\n");
10084 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010085 return(NULL);
10086 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010087 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10088 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010089 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010090 xmlFreeParserInputBuffer(buf);
10091 xmlFreeParserCtxt(ctxt);
10092 return(NULL);
10093 }
Owen Taylor3473f882001-02-23 17:55:21 +000010094 if (sax != NULL) {
10095 if (ctxt->sax != &xmlDefaultSAXHandler)
10096 xmlFree(ctxt->sax);
10097 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10098 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010099 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010100 xmlFreeParserInputBuffer(buf);
10101 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010102 return(NULL);
10103 }
10104 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10105 if (user_data != NULL)
10106 ctxt->userData = user_data;
10107 }
10108 if (filename == NULL) {
10109 ctxt->directory = NULL;
10110 } else {
10111 ctxt->directory = xmlParserGetDirectory(filename);
10112 }
10113
10114 inputStream = xmlNewInputStream(ctxt);
10115 if (inputStream == NULL) {
10116 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010117 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 return(NULL);
10119 }
10120
10121 if (filename == NULL)
10122 inputStream->filename = NULL;
10123 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010124 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010125 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010126 inputStream->buf = buf;
10127 inputStream->base = inputStream->buf->buffer->content;
10128 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010129 inputStream->end =
10130 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010131
10132 inputPush(ctxt, inputStream);
10133
10134 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10135 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010136 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10137 int cur = ctxt->input->cur - ctxt->input->base;
10138
Owen Taylor3473f882001-02-23 17:55:21 +000010139 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010140
10141 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10142 ctxt->input->cur = ctxt->input->base + cur;
10143 ctxt->input->end =
10144 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010145#ifdef DEBUG_PUSH
10146 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10147#endif
10148 }
10149
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010150 if (enc != XML_CHAR_ENCODING_NONE) {
10151 xmlSwitchEncoding(ctxt, enc);
10152 }
10153
Owen Taylor3473f882001-02-23 17:55:21 +000010154 return(ctxt);
10155}
10156
10157/**
10158 * xmlCreateIOParserCtxt:
10159 * @sax: a SAX handler
10160 * @user_data: The user data returned on SAX callbacks
10161 * @ioread: an I/O read function
10162 * @ioclose: an I/O close function
10163 * @ioctx: an I/O handler
10164 * @enc: the charset encoding if known
10165 *
10166 * Create a parser context for using the XML parser with an existing
10167 * I/O stream
10168 *
10169 * Returns the new parser context or NULL
10170 */
10171xmlParserCtxtPtr
10172xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10173 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10174 void *ioctx, xmlCharEncoding enc) {
10175 xmlParserCtxtPtr ctxt;
10176 xmlParserInputPtr inputStream;
10177 xmlParserInputBufferPtr buf;
10178
10179 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10180 if (buf == NULL) return(NULL);
10181
10182 ctxt = xmlNewParserCtxt();
10183 if (ctxt == NULL) {
10184 xmlFree(buf);
10185 return(NULL);
10186 }
10187 if (sax != NULL) {
10188 if (ctxt->sax != &xmlDefaultSAXHandler)
10189 xmlFree(ctxt->sax);
10190 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10191 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010192 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010193 xmlFree(ctxt);
10194 return(NULL);
10195 }
10196 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10197 if (user_data != NULL)
10198 ctxt->userData = user_data;
10199 }
10200
10201 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10202 if (inputStream == NULL) {
10203 xmlFreeParserCtxt(ctxt);
10204 return(NULL);
10205 }
10206 inputPush(ctxt, inputStream);
10207
10208 return(ctxt);
10209}
10210
10211/************************************************************************
10212 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010213 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010214 * *
10215 ************************************************************************/
10216
10217/**
10218 * xmlIOParseDTD:
10219 * @sax: the SAX handler block or NULL
10220 * @input: an Input Buffer
10221 * @enc: the charset encoding if known
10222 *
10223 * Load and parse a DTD
10224 *
10225 * Returns the resulting xmlDtdPtr or NULL in case of error.
10226 * @input will be freed at parsing end.
10227 */
10228
10229xmlDtdPtr
10230xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10231 xmlCharEncoding enc) {
10232 xmlDtdPtr ret = NULL;
10233 xmlParserCtxtPtr ctxt;
10234 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010235 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010236
10237 if (input == NULL)
10238 return(NULL);
10239
10240 ctxt = xmlNewParserCtxt();
10241 if (ctxt == NULL) {
10242 return(NULL);
10243 }
10244
10245 /*
10246 * Set-up the SAX context
10247 */
10248 if (sax != NULL) {
10249 if (ctxt->sax != NULL)
10250 xmlFree(ctxt->sax);
10251 ctxt->sax = sax;
10252 ctxt->userData = NULL;
10253 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010254 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010255
10256 /*
10257 * generate a parser input from the I/O handler
10258 */
10259
10260 pinput = xmlNewIOInputStream(ctxt, input, enc);
10261 if (pinput == NULL) {
10262 if (sax != NULL) ctxt->sax = NULL;
10263 xmlFreeParserCtxt(ctxt);
10264 return(NULL);
10265 }
10266
10267 /*
10268 * plug some encoding conversion routines here.
10269 */
10270 xmlPushInput(ctxt, pinput);
10271
10272 pinput->filename = NULL;
10273 pinput->line = 1;
10274 pinput->col = 1;
10275 pinput->base = ctxt->input->cur;
10276 pinput->cur = ctxt->input->cur;
10277 pinput->free = NULL;
10278
10279 /*
10280 * let's parse that entity knowing it's an external subset.
10281 */
10282 ctxt->inSubset = 2;
10283 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10284 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10285 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010286
10287 if (enc == XML_CHAR_ENCODING_NONE) {
10288 /*
10289 * Get the 4 first bytes and decode the charset
10290 * if enc != XML_CHAR_ENCODING_NONE
10291 * plug some encoding conversion routines.
10292 */
10293 start[0] = RAW;
10294 start[1] = NXT(1);
10295 start[2] = NXT(2);
10296 start[3] = NXT(3);
10297 enc = xmlDetectCharEncoding(start, 4);
10298 if (enc != XML_CHAR_ENCODING_NONE) {
10299 xmlSwitchEncoding(ctxt, enc);
10300 }
10301 }
10302
Owen Taylor3473f882001-02-23 17:55:21 +000010303 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10304
10305 if (ctxt->myDoc != NULL) {
10306 if (ctxt->wellFormed) {
10307 ret = ctxt->myDoc->extSubset;
10308 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010309 if (ret != NULL) {
10310 xmlNodePtr tmp;
10311
10312 ret->doc = NULL;
10313 tmp = ret->children;
10314 while (tmp != NULL) {
10315 tmp->doc = NULL;
10316 tmp = tmp->next;
10317 }
10318 }
Owen Taylor3473f882001-02-23 17:55:21 +000010319 } else {
10320 ret = NULL;
10321 }
10322 xmlFreeDoc(ctxt->myDoc);
10323 ctxt->myDoc = NULL;
10324 }
10325 if (sax != NULL) ctxt->sax = NULL;
10326 xmlFreeParserCtxt(ctxt);
10327
10328 return(ret);
10329}
10330
10331/**
10332 * xmlSAXParseDTD:
10333 * @sax: the SAX handler block
10334 * @ExternalID: a NAME* containing the External ID of the DTD
10335 * @SystemID: a NAME* containing the URL to the DTD
10336 *
10337 * Load and parse an external subset.
10338 *
10339 * Returns the resulting xmlDtdPtr or NULL in case of error.
10340 */
10341
10342xmlDtdPtr
10343xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10344 const xmlChar *SystemID) {
10345 xmlDtdPtr ret = NULL;
10346 xmlParserCtxtPtr ctxt;
10347 xmlParserInputPtr input = NULL;
10348 xmlCharEncoding enc;
10349
10350 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10351
10352 ctxt = xmlNewParserCtxt();
10353 if (ctxt == NULL) {
10354 return(NULL);
10355 }
10356
10357 /*
10358 * Set-up the SAX context
10359 */
10360 if (sax != NULL) {
10361 if (ctxt->sax != NULL)
10362 xmlFree(ctxt->sax);
10363 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010364 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010365 }
10366
10367 /*
10368 * Ask the Entity resolver to load the damn thing
10369 */
10370
10371 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010372 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010373 if (input == NULL) {
10374 if (sax != NULL) ctxt->sax = NULL;
10375 xmlFreeParserCtxt(ctxt);
10376 return(NULL);
10377 }
10378
10379 /*
10380 * plug some encoding conversion routines here.
10381 */
10382 xmlPushInput(ctxt, input);
10383 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10384 xmlSwitchEncoding(ctxt, enc);
10385
10386 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010387 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010388 input->line = 1;
10389 input->col = 1;
10390 input->base = ctxt->input->cur;
10391 input->cur = ctxt->input->cur;
10392 input->free = NULL;
10393
10394 /*
10395 * let's parse that entity knowing it's an external subset.
10396 */
10397 ctxt->inSubset = 2;
10398 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10399 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10400 ExternalID, SystemID);
10401 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10402
10403 if (ctxt->myDoc != NULL) {
10404 if (ctxt->wellFormed) {
10405 ret = ctxt->myDoc->extSubset;
10406 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010407 if (ret != NULL) {
10408 xmlNodePtr tmp;
10409
10410 ret->doc = NULL;
10411 tmp = ret->children;
10412 while (tmp != NULL) {
10413 tmp->doc = NULL;
10414 tmp = tmp->next;
10415 }
10416 }
Owen Taylor3473f882001-02-23 17:55:21 +000010417 } else {
10418 ret = NULL;
10419 }
10420 xmlFreeDoc(ctxt->myDoc);
10421 ctxt->myDoc = NULL;
10422 }
10423 if (sax != NULL) ctxt->sax = NULL;
10424 xmlFreeParserCtxt(ctxt);
10425
10426 return(ret);
10427}
10428
10429/**
10430 * xmlParseDTD:
10431 * @ExternalID: a NAME* containing the External ID of the DTD
10432 * @SystemID: a NAME* containing the URL to the DTD
10433 *
10434 * Load and parse an external subset.
10435 *
10436 * Returns the resulting xmlDtdPtr or NULL in case of error.
10437 */
10438
10439xmlDtdPtr
10440xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10441 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10442}
10443
10444/************************************************************************
10445 * *
10446 * Front ends when parsing an Entity *
10447 * *
10448 ************************************************************************/
10449
10450/**
Owen Taylor3473f882001-02-23 17:55:21 +000010451 * xmlParseCtxtExternalEntity:
10452 * @ctx: the existing parsing context
10453 * @URL: the URL for the entity to load
10454 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010455 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010456 *
10457 * Parse an external general entity within an existing parsing context
10458 * An external general parsed entity is well-formed if it matches the
10459 * production labeled extParsedEnt.
10460 *
10461 * [78] extParsedEnt ::= TextDecl? content
10462 *
10463 * Returns 0 if the entity is well formed, -1 in case of args problem and
10464 * the parser error code otherwise
10465 */
10466
10467int
10468xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010469 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010470 xmlParserCtxtPtr ctxt;
10471 xmlDocPtr newDoc;
10472 xmlSAXHandlerPtr oldsax = NULL;
10473 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010474 xmlChar start[4];
10475 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010476
10477 if (ctx->depth > 40) {
10478 return(XML_ERR_ENTITY_LOOP);
10479 }
10480
Daniel Veillardcda96922001-08-21 10:56:31 +000010481 if (lst != NULL)
10482 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010483 if ((URL == NULL) && (ID == NULL))
10484 return(-1);
10485 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10486 return(-1);
10487
10488
10489 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10490 if (ctxt == NULL) return(-1);
10491 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010492 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010493 oldsax = ctxt->sax;
10494 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010495 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010496 newDoc = xmlNewDoc(BAD_CAST "1.0");
10497 if (newDoc == NULL) {
10498 xmlFreeParserCtxt(ctxt);
10499 return(-1);
10500 }
10501 if (ctx->myDoc != NULL) {
10502 newDoc->intSubset = ctx->myDoc->intSubset;
10503 newDoc->extSubset = ctx->myDoc->extSubset;
10504 }
10505 if (ctx->myDoc->URL != NULL) {
10506 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10507 }
10508 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10509 if (newDoc->children == NULL) {
10510 ctxt->sax = oldsax;
10511 xmlFreeParserCtxt(ctxt);
10512 newDoc->intSubset = NULL;
10513 newDoc->extSubset = NULL;
10514 xmlFreeDoc(newDoc);
10515 return(-1);
10516 }
10517 nodePush(ctxt, newDoc->children);
10518 if (ctx->myDoc == NULL) {
10519 ctxt->myDoc = newDoc;
10520 } else {
10521 ctxt->myDoc = ctx->myDoc;
10522 newDoc->children->doc = ctx->myDoc;
10523 }
10524
Daniel Veillard87a764e2001-06-20 17:41:10 +000010525 /*
10526 * Get the 4 first bytes and decode the charset
10527 * if enc != XML_CHAR_ENCODING_NONE
10528 * plug some encoding conversion routines.
10529 */
10530 GROW
10531 start[0] = RAW;
10532 start[1] = NXT(1);
10533 start[2] = NXT(2);
10534 start[3] = NXT(3);
10535 enc = xmlDetectCharEncoding(start, 4);
10536 if (enc != XML_CHAR_ENCODING_NONE) {
10537 xmlSwitchEncoding(ctxt, enc);
10538 }
10539
Owen Taylor3473f882001-02-23 17:55:21 +000010540 /*
10541 * Parse a possible text declaration first
10542 */
Owen Taylor3473f882001-02-23 17:55:21 +000010543 if ((RAW == '<') && (NXT(1) == '?') &&
10544 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10545 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10546 xmlParseTextDecl(ctxt);
10547 }
10548
10549 /*
10550 * Doing validity checking on chunk doesn't make sense
10551 */
10552 ctxt->instate = XML_PARSER_CONTENT;
10553 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010554 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010555 ctxt->loadsubset = ctx->loadsubset;
10556 ctxt->depth = ctx->depth + 1;
10557 ctxt->replaceEntities = ctx->replaceEntities;
10558 if (ctxt->validate) {
10559 ctxt->vctxt.error = ctx->vctxt.error;
10560 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010561 } else {
10562 ctxt->vctxt.error = NULL;
10563 ctxt->vctxt.warning = NULL;
10564 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010565 ctxt->vctxt.nodeTab = NULL;
10566 ctxt->vctxt.nodeNr = 0;
10567 ctxt->vctxt.nodeMax = 0;
10568 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010569
10570 xmlParseContent(ctxt);
10571
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010572 ctx->validate = ctxt->validate;
10573 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010574 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010575 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010576 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010577 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010578 }
10579 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010580 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010581 }
10582
10583 if (!ctxt->wellFormed) {
10584 if (ctxt->errNo == 0)
10585 ret = 1;
10586 else
10587 ret = ctxt->errNo;
10588 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010589 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010590 xmlNodePtr cur;
10591
10592 /*
10593 * Return the newly created nodeset after unlinking it from
10594 * they pseudo parent.
10595 */
10596 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010597 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010598 while (cur != NULL) {
10599 cur->parent = NULL;
10600 cur = cur->next;
10601 }
10602 newDoc->children->children = NULL;
10603 }
10604 ret = 0;
10605 }
10606 ctxt->sax = oldsax;
10607 xmlFreeParserCtxt(ctxt);
10608 newDoc->intSubset = NULL;
10609 newDoc->extSubset = NULL;
10610 xmlFreeDoc(newDoc);
10611
10612 return(ret);
10613}
10614
10615/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010616 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010617 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010618 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010619 * @sax: the SAX handler bloc (possibly NULL)
10620 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10621 * @depth: Used for loop detection, use 0
10622 * @URL: the URL for the entity to load
10623 * @ID: the System ID for the entity to load
10624 * @list: the return value for the set of parsed nodes
10625 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010626 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010627 *
10628 * Returns 0 if the entity is well formed, -1 in case of args problem and
10629 * the parser error code otherwise
10630 */
10631
Daniel Veillard257d9102001-05-08 10:41:44 +000010632static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010633xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10634 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010635 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010636 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010637 xmlParserCtxtPtr ctxt;
10638 xmlDocPtr newDoc;
10639 xmlSAXHandlerPtr oldsax = NULL;
10640 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010641 xmlChar start[4];
10642 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010643
10644 if (depth > 40) {
10645 return(XML_ERR_ENTITY_LOOP);
10646 }
10647
10648
10649
10650 if (list != NULL)
10651 *list = NULL;
10652 if ((URL == NULL) && (ID == NULL))
10653 return(-1);
10654 if (doc == NULL) /* @@ relax but check for dereferences */
10655 return(-1);
10656
10657
10658 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10659 if (ctxt == NULL) return(-1);
10660 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010661 if (oldctxt != NULL) {
10662 ctxt->_private = oldctxt->_private;
10663 ctxt->loadsubset = oldctxt->loadsubset;
10664 ctxt->validate = oldctxt->validate;
10665 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010666 ctxt->record_info = oldctxt->record_info;
10667 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10668 ctxt->node_seq.length = oldctxt->node_seq.length;
10669 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010670 } else {
10671 /*
10672 * Doing validity checking on chunk without context
10673 * doesn't make sense
10674 */
10675 ctxt->_private = NULL;
10676 ctxt->validate = 0;
10677 ctxt->external = 2;
10678 ctxt->loadsubset = 0;
10679 }
Owen Taylor3473f882001-02-23 17:55:21 +000010680 if (sax != NULL) {
10681 oldsax = ctxt->sax;
10682 ctxt->sax = sax;
10683 if (user_data != NULL)
10684 ctxt->userData = user_data;
10685 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010686 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010687 newDoc = xmlNewDoc(BAD_CAST "1.0");
10688 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010689 ctxt->node_seq.maximum = 0;
10690 ctxt->node_seq.length = 0;
10691 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010692 xmlFreeParserCtxt(ctxt);
10693 return(-1);
10694 }
10695 if (doc != NULL) {
10696 newDoc->intSubset = doc->intSubset;
10697 newDoc->extSubset = doc->extSubset;
10698 }
10699 if (doc->URL != NULL) {
10700 newDoc->URL = xmlStrdup(doc->URL);
10701 }
10702 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10703 if (newDoc->children == NULL) {
10704 if (sax != NULL)
10705 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010706 ctxt->node_seq.maximum = 0;
10707 ctxt->node_seq.length = 0;
10708 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010709 xmlFreeParserCtxt(ctxt);
10710 newDoc->intSubset = NULL;
10711 newDoc->extSubset = NULL;
10712 xmlFreeDoc(newDoc);
10713 return(-1);
10714 }
10715 nodePush(ctxt, newDoc->children);
10716 if (doc == NULL) {
10717 ctxt->myDoc = newDoc;
10718 } else {
10719 ctxt->myDoc = doc;
10720 newDoc->children->doc = doc;
10721 }
10722
Daniel Veillard87a764e2001-06-20 17:41:10 +000010723 /*
10724 * Get the 4 first bytes and decode the charset
10725 * if enc != XML_CHAR_ENCODING_NONE
10726 * plug some encoding conversion routines.
10727 */
10728 GROW;
10729 start[0] = RAW;
10730 start[1] = NXT(1);
10731 start[2] = NXT(2);
10732 start[3] = NXT(3);
10733 enc = xmlDetectCharEncoding(start, 4);
10734 if (enc != XML_CHAR_ENCODING_NONE) {
10735 xmlSwitchEncoding(ctxt, enc);
10736 }
10737
Owen Taylor3473f882001-02-23 17:55:21 +000010738 /*
10739 * Parse a possible text declaration first
10740 */
Owen Taylor3473f882001-02-23 17:55:21 +000010741 if ((RAW == '<') && (NXT(1) == '?') &&
10742 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10743 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10744 xmlParseTextDecl(ctxt);
10745 }
10746
Owen Taylor3473f882001-02-23 17:55:21 +000010747 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010748 ctxt->depth = depth;
10749
10750 xmlParseContent(ctxt);
10751
Daniel Veillard561b7f82002-03-20 21:55:57 +000010752 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010753 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010754 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010755 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010756 }
10757 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010758 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010759 }
10760
10761 if (!ctxt->wellFormed) {
10762 if (ctxt->errNo == 0)
10763 ret = 1;
10764 else
10765 ret = ctxt->errNo;
10766 } else {
10767 if (list != NULL) {
10768 xmlNodePtr cur;
10769
10770 /*
10771 * Return the newly created nodeset after unlinking it from
10772 * they pseudo parent.
10773 */
10774 cur = newDoc->children->children;
10775 *list = cur;
10776 while (cur != NULL) {
10777 cur->parent = NULL;
10778 cur = cur->next;
10779 }
10780 newDoc->children->children = NULL;
10781 }
10782 ret = 0;
10783 }
10784 if (sax != NULL)
10785 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010786 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10787 oldctxt->node_seq.length = ctxt->node_seq.length;
10788 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010789 ctxt->node_seq.maximum = 0;
10790 ctxt->node_seq.length = 0;
10791 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010792 xmlFreeParserCtxt(ctxt);
10793 newDoc->intSubset = NULL;
10794 newDoc->extSubset = NULL;
10795 xmlFreeDoc(newDoc);
10796
10797 return(ret);
10798}
10799
10800/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010801 * xmlParseExternalEntity:
10802 * @doc: the document the chunk pertains to
10803 * @sax: the SAX handler bloc (possibly NULL)
10804 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10805 * @depth: Used for loop detection, use 0
10806 * @URL: the URL for the entity to load
10807 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010808 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010809 *
10810 * Parse an external general entity
10811 * An external general parsed entity is well-formed if it matches the
10812 * production labeled extParsedEnt.
10813 *
10814 * [78] extParsedEnt ::= TextDecl? content
10815 *
10816 * Returns 0 if the entity is well formed, -1 in case of args problem and
10817 * the parser error code otherwise
10818 */
10819
10820int
10821xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010822 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010823 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010824 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010825}
10826
10827/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010828 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010829 * @doc: the document the chunk pertains to
10830 * @sax: the SAX handler bloc (possibly NULL)
10831 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10832 * @depth: Used for loop detection, use 0
10833 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010834 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010835 *
10836 * Parse a well-balanced chunk of an XML document
10837 * called by the parser
10838 * The allowed sequence for the Well Balanced Chunk is the one defined by
10839 * the content production in the XML grammar:
10840 *
10841 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10842 *
10843 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10844 * the parser error code otherwise
10845 */
10846
10847int
10848xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010849 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010850 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10851 depth, string, lst, 0 );
10852}
10853
10854/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010855 * xmlParseBalancedChunkMemoryInternal:
10856 * @oldctxt: the existing parsing context
10857 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10858 * @user_data: the user data field for the parser context
10859 * @lst: the return value for the set of parsed nodes
10860 *
10861 *
10862 * Parse a well-balanced chunk of an XML document
10863 * called by the parser
10864 * The allowed sequence for the Well Balanced Chunk is the one defined by
10865 * the content production in the XML grammar:
10866 *
10867 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10868 *
10869 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10870 * the parser error code otherwise
10871 *
10872 * In case recover is set to 1, the nodelist will not be empty even if
10873 * the parsed chunk is not well balanced.
10874 */
10875static int
10876xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10877 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10878 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010879 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010880 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010881 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010882 int size;
10883 int ret = 0;
10884
10885 if (oldctxt->depth > 40) {
10886 return(XML_ERR_ENTITY_LOOP);
10887 }
10888
10889
10890 if (lst != NULL)
10891 *lst = NULL;
10892 if (string == NULL)
10893 return(-1);
10894
10895 size = xmlStrlen(string);
10896
10897 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10898 if (ctxt == NULL) return(-1);
10899 if (user_data != NULL)
10900 ctxt->userData = user_data;
10901 else
10902 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010903 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10904 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010905
10906 oldsax = ctxt->sax;
10907 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010908 xmlDetectSAX2(ctxt);
10909
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010910 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010911 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010912 newDoc = xmlNewDoc(BAD_CAST "1.0");
10913 if (newDoc == NULL) {
10914 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010915 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010916 xmlFreeParserCtxt(ctxt);
10917 return(-1);
10918 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010919 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010920 } else {
10921 ctxt->myDoc = oldctxt->myDoc;
10922 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010923 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010924 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010925 BAD_CAST "pseudoroot", NULL);
10926 if (ctxt->myDoc->children == NULL) {
10927 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010928 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010929 xmlFreeParserCtxt(ctxt);
10930 if (newDoc != NULL)
10931 xmlFreeDoc(newDoc);
10932 return(-1);
10933 }
10934 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010935 ctxt->instate = XML_PARSER_CONTENT;
10936 ctxt->depth = oldctxt->depth + 1;
10937
Daniel Veillard328f48c2002-11-15 15:24:34 +000010938 ctxt->validate = 0;
10939 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010940 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10941 /*
10942 * ID/IDREF registration will be done in xmlValidateElement below
10943 */
10944 ctxt->loadsubset |= XML_SKIP_IDS;
10945 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010946 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010947
Daniel Veillard68e9e742002-11-16 15:35:11 +000010948 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010949 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010950 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010951 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010952 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010953 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010954 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010955 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010956 }
10957
10958 if (!ctxt->wellFormed) {
10959 if (ctxt->errNo == 0)
10960 ret = 1;
10961 else
10962 ret = ctxt->errNo;
10963 } else {
10964 ret = 0;
10965 }
10966
10967 if ((lst != NULL) && (ret == 0)) {
10968 xmlNodePtr cur;
10969
10970 /*
10971 * Return the newly created nodeset after unlinking it from
10972 * they pseudo parent.
10973 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010974 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010975 *lst = cur;
10976 while (cur != NULL) {
Daniel Veillard8d589042003-02-04 15:07:21 +000010977 if (oldctxt->validate && oldctxt->wellFormed &&
10978 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10979 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10980 oldctxt->myDoc, cur);
10981 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010982 cur->parent = NULL;
10983 cur = cur->next;
10984 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010985 ctxt->myDoc->children->children = NULL;
10986 }
10987 if (ctxt->myDoc != NULL) {
10988 xmlFreeNode(ctxt->myDoc->children);
10989 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010990 }
10991
10992 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010993 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010994 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010995 if (newDoc != NULL)
10996 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010997
10998 return(ret);
10999}
11000
11001/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011002 * xmlParseBalancedChunkMemoryRecover:
11003 * @doc: the document the chunk pertains to
11004 * @sax: the SAX handler bloc (possibly NULL)
11005 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11006 * @depth: Used for loop detection, use 0
11007 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11008 * @lst: the return value for the set of parsed nodes
11009 * @recover: return nodes even if the data is broken (use 0)
11010 *
11011 *
11012 * Parse a well-balanced chunk of an XML document
11013 * called by the parser
11014 * The allowed sequence for the Well Balanced Chunk is the one defined by
11015 * the content production in the XML grammar:
11016 *
11017 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11018 *
11019 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11020 * the parser error code otherwise
11021 *
11022 * In case recover is set to 1, the nodelist will not be empty even if
11023 * the parsed chunk is not well balanced.
11024 */
11025int
11026xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11027 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11028 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011029 xmlParserCtxtPtr ctxt;
11030 xmlDocPtr newDoc;
11031 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011032 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011033 int size;
11034 int ret = 0;
11035
11036 if (depth > 40) {
11037 return(XML_ERR_ENTITY_LOOP);
11038 }
11039
11040
Daniel Veillardcda96922001-08-21 10:56:31 +000011041 if (lst != NULL)
11042 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011043 if (string == NULL)
11044 return(-1);
11045
11046 size = xmlStrlen(string);
11047
11048 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11049 if (ctxt == NULL) return(-1);
11050 ctxt->userData = ctxt;
11051 if (sax != NULL) {
11052 oldsax = ctxt->sax;
11053 ctxt->sax = sax;
11054 if (user_data != NULL)
11055 ctxt->userData = user_data;
11056 }
11057 newDoc = xmlNewDoc(BAD_CAST "1.0");
11058 if (newDoc == NULL) {
11059 xmlFreeParserCtxt(ctxt);
11060 return(-1);
11061 }
11062 if (doc != NULL) {
11063 newDoc->intSubset = doc->intSubset;
11064 newDoc->extSubset = doc->extSubset;
11065 }
11066 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11067 if (newDoc->children == NULL) {
11068 if (sax != NULL)
11069 ctxt->sax = oldsax;
11070 xmlFreeParserCtxt(ctxt);
11071 newDoc->intSubset = NULL;
11072 newDoc->extSubset = NULL;
11073 xmlFreeDoc(newDoc);
11074 return(-1);
11075 }
11076 nodePush(ctxt, newDoc->children);
11077 if (doc == NULL) {
11078 ctxt->myDoc = newDoc;
11079 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011080 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011081 newDoc->children->doc = doc;
11082 }
11083 ctxt->instate = XML_PARSER_CONTENT;
11084 ctxt->depth = depth;
11085
11086 /*
11087 * Doing validity checking on chunk doesn't make sense
11088 */
11089 ctxt->validate = 0;
11090 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011091 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011092
Daniel Veillardb39bc392002-10-26 19:29:51 +000011093 if ( doc != NULL ){
11094 content = doc->children;
11095 doc->children = NULL;
11096 xmlParseContent(ctxt);
11097 doc->children = content;
11098 }
11099 else {
11100 xmlParseContent(ctxt);
11101 }
Owen Taylor3473f882001-02-23 17:55:21 +000011102 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011104 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011106 }
11107 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011109 }
11110
11111 if (!ctxt->wellFormed) {
11112 if (ctxt->errNo == 0)
11113 ret = 1;
11114 else
11115 ret = ctxt->errNo;
11116 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011117 ret = 0;
11118 }
11119
11120 if (lst != NULL && (ret == 0 || recover == 1)) {
11121 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011122
11123 /*
11124 * Return the newly created nodeset after unlinking it from
11125 * they pseudo parent.
11126 */
11127 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011128 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011129 while (cur != NULL) {
11130 cur->parent = NULL;
11131 cur = cur->next;
11132 }
11133 newDoc->children->children = NULL;
11134 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011135
Owen Taylor3473f882001-02-23 17:55:21 +000011136 if (sax != NULL)
11137 ctxt->sax = oldsax;
11138 xmlFreeParserCtxt(ctxt);
11139 newDoc->intSubset = NULL;
11140 newDoc->extSubset = NULL;
11141 xmlFreeDoc(newDoc);
11142
11143 return(ret);
11144}
11145
11146/**
11147 * xmlSAXParseEntity:
11148 * @sax: the SAX handler block
11149 * @filename: the filename
11150 *
11151 * parse an XML external entity out of context and build a tree.
11152 * It use the given SAX function block to handle the parsing callback.
11153 * If sax is NULL, fallback to the default DOM tree building routines.
11154 *
11155 * [78] extParsedEnt ::= TextDecl? content
11156 *
11157 * This correspond to a "Well Balanced" chunk
11158 *
11159 * Returns the resulting document tree
11160 */
11161
11162xmlDocPtr
11163xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11164 xmlDocPtr ret;
11165 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011166
11167 ctxt = xmlCreateFileParserCtxt(filename);
11168 if (ctxt == NULL) {
11169 return(NULL);
11170 }
11171 if (sax != NULL) {
11172 if (ctxt->sax != NULL)
11173 xmlFree(ctxt->sax);
11174 ctxt->sax = sax;
11175 ctxt->userData = NULL;
11176 }
11177
Owen Taylor3473f882001-02-23 17:55:21 +000011178 xmlParseExtParsedEnt(ctxt);
11179
11180 if (ctxt->wellFormed)
11181 ret = ctxt->myDoc;
11182 else {
11183 ret = NULL;
11184 xmlFreeDoc(ctxt->myDoc);
11185 ctxt->myDoc = NULL;
11186 }
11187 if (sax != NULL)
11188 ctxt->sax = NULL;
11189 xmlFreeParserCtxt(ctxt);
11190
11191 return(ret);
11192}
11193
11194/**
11195 * xmlParseEntity:
11196 * @filename: the filename
11197 *
11198 * parse an XML external entity out of context and build a tree.
11199 *
11200 * [78] extParsedEnt ::= TextDecl? content
11201 *
11202 * This correspond to a "Well Balanced" chunk
11203 *
11204 * Returns the resulting document tree
11205 */
11206
11207xmlDocPtr
11208xmlParseEntity(const char *filename) {
11209 return(xmlSAXParseEntity(NULL, filename));
11210}
11211
11212/**
11213 * xmlCreateEntityParserCtxt:
11214 * @URL: the entity URL
11215 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011216 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011217 *
11218 * Create a parser context for an external entity
11219 * Automatic support for ZLIB/Compress compressed document is provided
11220 * by default if found at compile-time.
11221 *
11222 * Returns the new parser context or NULL
11223 */
11224xmlParserCtxtPtr
11225xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11226 const xmlChar *base) {
11227 xmlParserCtxtPtr ctxt;
11228 xmlParserInputPtr inputStream;
11229 char *directory = NULL;
11230 xmlChar *uri;
11231
11232 ctxt = xmlNewParserCtxt();
11233 if (ctxt == NULL) {
11234 return(NULL);
11235 }
11236
11237 uri = xmlBuildURI(URL, base);
11238
11239 if (uri == NULL) {
11240 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11241 if (inputStream == NULL) {
11242 xmlFreeParserCtxt(ctxt);
11243 return(NULL);
11244 }
11245
11246 inputPush(ctxt, inputStream);
11247
11248 if ((ctxt->directory == NULL) && (directory == NULL))
11249 directory = xmlParserGetDirectory((char *)URL);
11250 if ((ctxt->directory == NULL) && (directory != NULL))
11251 ctxt->directory = directory;
11252 } else {
11253 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11254 if (inputStream == NULL) {
11255 xmlFree(uri);
11256 xmlFreeParserCtxt(ctxt);
11257 return(NULL);
11258 }
11259
11260 inputPush(ctxt, inputStream);
11261
11262 if ((ctxt->directory == NULL) && (directory == NULL))
11263 directory = xmlParserGetDirectory((char *)uri);
11264 if ((ctxt->directory == NULL) && (directory != NULL))
11265 ctxt->directory = directory;
11266 xmlFree(uri);
11267 }
Owen Taylor3473f882001-02-23 17:55:21 +000011268 return(ctxt);
11269}
11270
11271/************************************************************************
11272 * *
11273 * Front ends when parsing from a file *
11274 * *
11275 ************************************************************************/
11276
11277/**
11278 * xmlCreateFileParserCtxt:
11279 * @filename: the filename
11280 *
11281 * Create a parser context for a file content.
11282 * Automatic support for ZLIB/Compress compressed document is provided
11283 * by default if found at compile-time.
11284 *
11285 * Returns the new parser context or NULL
11286 */
11287xmlParserCtxtPtr
11288xmlCreateFileParserCtxt(const char *filename)
11289{
11290 xmlParserCtxtPtr ctxt;
11291 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011292 char *directory = NULL;
11293
Owen Taylor3473f882001-02-23 17:55:21 +000011294 ctxt = xmlNewParserCtxt();
11295 if (ctxt == NULL) {
11296 if (xmlDefaultSAXHandler.error != NULL) {
11297 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
11298 }
11299 return(NULL);
11300 }
11301
Igor Zlatkovicce076162003-02-23 13:39:39 +000011302
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011303 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011304 if (inputStream == NULL) {
11305 xmlFreeParserCtxt(ctxt);
11306 return(NULL);
11307 }
11308
Owen Taylor3473f882001-02-23 17:55:21 +000011309 inputPush(ctxt, inputStream);
11310 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011311 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011312 if ((ctxt->directory == NULL) && (directory != NULL))
11313 ctxt->directory = directory;
11314
11315 return(ctxt);
11316}
11317
11318/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011319 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011320 * @sax: the SAX handler block
11321 * @filename: the filename
11322 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11323 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011324 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011325 *
11326 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11327 * compressed document is provided by default if found at compile-time.
11328 * It use the given SAX function block to handle the parsing callback.
11329 * If sax is NULL, fallback to the default DOM tree building routines.
11330 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011331 * User data (void *) is stored within the parser context in the
11332 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011333 *
Owen Taylor3473f882001-02-23 17:55:21 +000011334 * Returns the resulting document tree
11335 */
11336
11337xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011338xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11339 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011340 xmlDocPtr ret;
11341 xmlParserCtxtPtr ctxt;
11342 char *directory = NULL;
11343
Daniel Veillard635ef722001-10-29 11:48:19 +000011344 xmlInitParser();
11345
Owen Taylor3473f882001-02-23 17:55:21 +000011346 ctxt = xmlCreateFileParserCtxt(filename);
11347 if (ctxt == NULL) {
11348 return(NULL);
11349 }
11350 if (sax != NULL) {
11351 if (ctxt->sax != NULL)
11352 xmlFree(ctxt->sax);
11353 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011354 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011355 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011356 if (data!=NULL) {
11357 ctxt->_private=data;
11358 }
Owen Taylor3473f882001-02-23 17:55:21 +000011359
11360 if ((ctxt->directory == NULL) && (directory == NULL))
11361 directory = xmlParserGetDirectory(filename);
11362 if ((ctxt->directory == NULL) && (directory != NULL))
11363 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11364
Daniel Veillarddad3f682002-11-17 16:47:27 +000011365 ctxt->recovery = recovery;
11366
Owen Taylor3473f882001-02-23 17:55:21 +000011367 xmlParseDocument(ctxt);
11368
William M. Brackc07329e2003-09-08 01:57:30 +000011369 if ((ctxt->wellFormed) || recovery) {
11370 ret = ctxt->myDoc;
11371 if (ctxt->input->buf->compressed > 0)
11372 ret->compression = 9;
11373 else
11374 ret->compression = ctxt->input->buf->compressed;
11375 }
Owen Taylor3473f882001-02-23 17:55:21 +000011376 else {
11377 ret = NULL;
11378 xmlFreeDoc(ctxt->myDoc);
11379 ctxt->myDoc = NULL;
11380 }
11381 if (sax != NULL)
11382 ctxt->sax = NULL;
11383 xmlFreeParserCtxt(ctxt);
11384
11385 return(ret);
11386}
11387
11388/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011389 * xmlSAXParseFile:
11390 * @sax: the SAX handler block
11391 * @filename: the filename
11392 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11393 * documents
11394 *
11395 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11396 * compressed document is provided by default if found at compile-time.
11397 * It use the given SAX function block to handle the parsing callback.
11398 * If sax is NULL, fallback to the default DOM tree building routines.
11399 *
11400 * Returns the resulting document tree
11401 */
11402
11403xmlDocPtr
11404xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11405 int recovery) {
11406 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11407}
11408
11409/**
Owen Taylor3473f882001-02-23 17:55:21 +000011410 * xmlRecoverDoc:
11411 * @cur: a pointer to an array of xmlChar
11412 *
11413 * parse an XML in-memory document and build a tree.
11414 * In the case the document is not Well Formed, a tree is built anyway
11415 *
11416 * Returns the resulting document tree
11417 */
11418
11419xmlDocPtr
11420xmlRecoverDoc(xmlChar *cur) {
11421 return(xmlSAXParseDoc(NULL, cur, 1));
11422}
11423
11424/**
11425 * xmlParseFile:
11426 * @filename: the filename
11427 *
11428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11429 * compressed document is provided by default if found at compile-time.
11430 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011431 * Returns the resulting document tree if the file was wellformed,
11432 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011433 */
11434
11435xmlDocPtr
11436xmlParseFile(const char *filename) {
11437 return(xmlSAXParseFile(NULL, filename, 0));
11438}
11439
11440/**
11441 * xmlRecoverFile:
11442 * @filename: the filename
11443 *
11444 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11445 * compressed document is provided by default if found at compile-time.
11446 * In the case the document is not Well Formed, a tree is built anyway
11447 *
11448 * Returns the resulting document tree
11449 */
11450
11451xmlDocPtr
11452xmlRecoverFile(const char *filename) {
11453 return(xmlSAXParseFile(NULL, filename, 1));
11454}
11455
11456
11457/**
11458 * xmlSetupParserForBuffer:
11459 * @ctxt: an XML parser context
11460 * @buffer: a xmlChar * buffer
11461 * @filename: a file name
11462 *
11463 * Setup the parser context to parse a new buffer; Clears any prior
11464 * contents from the parser context. The buffer parameter must not be
11465 * NULL, but the filename parameter can be
11466 */
11467void
11468xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11469 const char* filename)
11470{
11471 xmlParserInputPtr input;
11472
11473 input = xmlNewInputStream(ctxt);
11474 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000011475 xmlGenericError(xmlGenericErrorContext,
11476 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000011477 xmlFree(ctxt);
11478 return;
11479 }
11480
11481 xmlClearParserCtxt(ctxt);
11482 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011483 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011484 input->base = buffer;
11485 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011486 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011487 inputPush(ctxt, input);
11488}
11489
11490/**
11491 * xmlSAXUserParseFile:
11492 * @sax: a SAX handler
11493 * @user_data: The user data returned on SAX callbacks
11494 * @filename: a file name
11495 *
11496 * parse an XML file and call the given SAX handler routines.
11497 * Automatic support for ZLIB/Compress compressed document is provided
11498 *
11499 * Returns 0 in case of success or a error number otherwise
11500 */
11501int
11502xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11503 const char *filename) {
11504 int ret = 0;
11505 xmlParserCtxtPtr ctxt;
11506
11507 ctxt = xmlCreateFileParserCtxt(filename);
11508 if (ctxt == NULL) return -1;
11509 if (ctxt->sax != &xmlDefaultSAXHandler)
11510 xmlFree(ctxt->sax);
11511 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011512 xmlDetectSAX2(ctxt);
11513
Owen Taylor3473f882001-02-23 17:55:21 +000011514 if (user_data != NULL)
11515 ctxt->userData = user_data;
11516
11517 xmlParseDocument(ctxt);
11518
11519 if (ctxt->wellFormed)
11520 ret = 0;
11521 else {
11522 if (ctxt->errNo != 0)
11523 ret = ctxt->errNo;
11524 else
11525 ret = -1;
11526 }
11527 if (sax != NULL)
11528 ctxt->sax = NULL;
11529 xmlFreeParserCtxt(ctxt);
11530
11531 return ret;
11532}
11533
11534/************************************************************************
11535 * *
11536 * Front ends when parsing from memory *
11537 * *
11538 ************************************************************************/
11539
11540/**
11541 * xmlCreateMemoryParserCtxt:
11542 * @buffer: a pointer to a char array
11543 * @size: the size of the array
11544 *
11545 * Create a parser context for an XML in-memory document.
11546 *
11547 * Returns the new parser context or NULL
11548 */
11549xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011550xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011551 xmlParserCtxtPtr ctxt;
11552 xmlParserInputPtr input;
11553 xmlParserInputBufferPtr buf;
11554
11555 if (buffer == NULL)
11556 return(NULL);
11557 if (size <= 0)
11558 return(NULL);
11559
11560 ctxt = xmlNewParserCtxt();
11561 if (ctxt == NULL)
11562 return(NULL);
11563
Daniel Veillard53350552003-09-18 13:35:51 +000011564 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011565 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011566 if (buf == NULL) {
11567 xmlFreeParserCtxt(ctxt);
11568 return(NULL);
11569 }
Owen Taylor3473f882001-02-23 17:55:21 +000011570
11571 input = xmlNewInputStream(ctxt);
11572 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011573 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011574 xmlFreeParserCtxt(ctxt);
11575 return(NULL);
11576 }
11577
11578 input->filename = NULL;
11579 input->buf = buf;
11580 input->base = input->buf->buffer->content;
11581 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011582 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011583
11584 inputPush(ctxt, input);
11585 return(ctxt);
11586}
11587
11588/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011589 * xmlSAXParseMemoryWithData:
11590 * @sax: the SAX handler block
11591 * @buffer: an pointer to a char array
11592 * @size: the size of the array
11593 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11594 * documents
11595 * @data: the userdata
11596 *
11597 * parse an XML in-memory block and use the given SAX function block
11598 * to handle the parsing callback. If sax is NULL, fallback to the default
11599 * DOM tree building routines.
11600 *
11601 * User data (void *) is stored within the parser context in the
11602 * context's _private member, so it is available nearly everywhere in libxml
11603 *
11604 * Returns the resulting document tree
11605 */
11606
11607xmlDocPtr
11608xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11609 int size, int recovery, void *data) {
11610 xmlDocPtr ret;
11611 xmlParserCtxtPtr ctxt;
11612
11613 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11614 if (ctxt == NULL) return(NULL);
11615 if (sax != NULL) {
11616 if (ctxt->sax != NULL)
11617 xmlFree(ctxt->sax);
11618 ctxt->sax = sax;
11619 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011620 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011621 if (data!=NULL) {
11622 ctxt->_private=data;
11623 }
11624
Daniel Veillardadba5f12003-04-04 16:09:01 +000011625 ctxt->recovery = recovery;
11626
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011627 xmlParseDocument(ctxt);
11628
11629 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11630 else {
11631 ret = NULL;
11632 xmlFreeDoc(ctxt->myDoc);
11633 ctxt->myDoc = NULL;
11634 }
11635 if (sax != NULL)
11636 ctxt->sax = NULL;
11637 xmlFreeParserCtxt(ctxt);
11638
11639 return(ret);
11640}
11641
11642/**
Owen Taylor3473f882001-02-23 17:55:21 +000011643 * xmlSAXParseMemory:
11644 * @sax: the SAX handler block
11645 * @buffer: an pointer to a char array
11646 * @size: the size of the array
11647 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11648 * documents
11649 *
11650 * parse an XML in-memory block and use the given SAX function block
11651 * to handle the parsing callback. If sax is NULL, fallback to the default
11652 * DOM tree building routines.
11653 *
11654 * Returns the resulting document tree
11655 */
11656xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011657xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11658 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011659 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011660}
11661
11662/**
11663 * xmlParseMemory:
11664 * @buffer: an pointer to a char array
11665 * @size: the size of the array
11666 *
11667 * parse an XML in-memory block and build a tree.
11668 *
11669 * Returns the resulting document tree
11670 */
11671
Daniel Veillard50822cb2001-07-26 20:05:51 +000011672xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011673 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11674}
11675
11676/**
11677 * xmlRecoverMemory:
11678 * @buffer: an pointer to a char array
11679 * @size: the size of the array
11680 *
11681 * parse an XML in-memory block and build a tree.
11682 * In the case the document is not Well Formed, a tree is built anyway
11683 *
11684 * Returns the resulting document tree
11685 */
11686
Daniel Veillard50822cb2001-07-26 20:05:51 +000011687xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011688 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11689}
11690
11691/**
11692 * xmlSAXUserParseMemory:
11693 * @sax: a SAX handler
11694 * @user_data: The user data returned on SAX callbacks
11695 * @buffer: an in-memory XML document input
11696 * @size: the length of the XML document in bytes
11697 *
11698 * A better SAX parsing routine.
11699 * parse an XML in-memory buffer and call the given SAX handler routines.
11700 *
11701 * Returns 0 in case of success or a error number otherwise
11702 */
11703int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011704 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011705 int ret = 0;
11706 xmlParserCtxtPtr ctxt;
11707 xmlSAXHandlerPtr oldsax = NULL;
11708
Daniel Veillard9e923512002-08-14 08:48:52 +000011709 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011710 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11711 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011712 oldsax = ctxt->sax;
11713 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011714 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011715 if (user_data != NULL)
11716 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011717
11718 xmlParseDocument(ctxt);
11719
11720 if (ctxt->wellFormed)
11721 ret = 0;
11722 else {
11723 if (ctxt->errNo != 0)
11724 ret = ctxt->errNo;
11725 else
11726 ret = -1;
11727 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011728 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011729 xmlFreeParserCtxt(ctxt);
11730
11731 return ret;
11732}
11733
11734/**
11735 * xmlCreateDocParserCtxt:
11736 * @cur: a pointer to an array of xmlChar
11737 *
11738 * Creates a parser context for an XML in-memory document.
11739 *
11740 * Returns the new parser context or NULL
11741 */
11742xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011743xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011744 int len;
11745
11746 if (cur == NULL)
11747 return(NULL);
11748 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011749 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011750}
11751
11752/**
11753 * xmlSAXParseDoc:
11754 * @sax: the SAX handler block
11755 * @cur: a pointer to an array of xmlChar
11756 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11757 * documents
11758 *
11759 * parse an XML in-memory document and build a tree.
11760 * It use the given SAX function block to handle the parsing callback.
11761 * If sax is NULL, fallback to the default DOM tree building routines.
11762 *
11763 * Returns the resulting document tree
11764 */
11765
11766xmlDocPtr
11767xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11768 xmlDocPtr ret;
11769 xmlParserCtxtPtr ctxt;
11770
11771 if (cur == NULL) return(NULL);
11772
11773
11774 ctxt = xmlCreateDocParserCtxt(cur);
11775 if (ctxt == NULL) return(NULL);
11776 if (sax != NULL) {
11777 ctxt->sax = sax;
11778 ctxt->userData = NULL;
11779 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011780 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011781
11782 xmlParseDocument(ctxt);
11783 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11784 else {
11785 ret = NULL;
11786 xmlFreeDoc(ctxt->myDoc);
11787 ctxt->myDoc = NULL;
11788 }
11789 if (sax != NULL)
11790 ctxt->sax = NULL;
11791 xmlFreeParserCtxt(ctxt);
11792
11793 return(ret);
11794}
11795
11796/**
11797 * xmlParseDoc:
11798 * @cur: a pointer to an array of xmlChar
11799 *
11800 * parse an XML in-memory document and build a tree.
11801 *
11802 * Returns the resulting document tree
11803 */
11804
11805xmlDocPtr
11806xmlParseDoc(xmlChar *cur) {
11807 return(xmlSAXParseDoc(NULL, cur, 0));
11808}
11809
Daniel Veillard8107a222002-01-13 14:10:10 +000011810/************************************************************************
11811 * *
11812 * Specific function to keep track of entities references *
11813 * and used by the XSLT debugger *
11814 * *
11815 ************************************************************************/
11816
11817static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11818
11819/**
11820 * xmlAddEntityReference:
11821 * @ent : A valid entity
11822 * @firstNode : A valid first node for children of entity
11823 * @lastNode : A valid last node of children entity
11824 *
11825 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11826 */
11827static void
11828xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11829 xmlNodePtr lastNode)
11830{
11831 if (xmlEntityRefFunc != NULL) {
11832 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11833 }
11834}
11835
11836
11837/**
11838 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011839 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011840 *
11841 * Set the function to call call back when a xml reference has been made
11842 */
11843void
11844xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11845{
11846 xmlEntityRefFunc = func;
11847}
Owen Taylor3473f882001-02-23 17:55:21 +000011848
11849/************************************************************************
11850 * *
11851 * Miscellaneous *
11852 * *
11853 ************************************************************************/
11854
11855#ifdef LIBXML_XPATH_ENABLED
11856#include <libxml/xpath.h>
11857#endif
11858
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011859extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011860static int xmlParserInitialized = 0;
11861
11862/**
11863 * xmlInitParser:
11864 *
11865 * Initialization function for the XML parser.
11866 * This is not reentrant. Call once before processing in case of
11867 * use in multithreaded programs.
11868 */
11869
11870void
11871xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011872 if (xmlParserInitialized != 0)
11873 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011874
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011875 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11876 (xmlGenericError == NULL))
11877 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011878 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011879 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011880 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011881 xmlInitCharEncodingHandlers();
11882 xmlInitializePredefinedEntities();
11883 xmlDefaultSAXHandlerInit();
11884 xmlRegisterDefaultInputCallbacks();
11885 xmlRegisterDefaultOutputCallbacks();
11886#ifdef LIBXML_HTML_ENABLED
11887 htmlInitAutoClose();
11888 htmlDefaultSAXHandlerInit();
11889#endif
11890#ifdef LIBXML_XPATH_ENABLED
11891 xmlXPathInit();
11892#endif
11893 xmlParserInitialized = 1;
11894}
11895
11896/**
11897 * xmlCleanupParser:
11898 *
11899 * Cleanup function for the XML parser. It tries to reclaim all
11900 * parsing related global memory allocated for the parser processing.
11901 * It doesn't deallocate any document related memory. Calling this
11902 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011903 * One should call xmlCleanupParser() only when the process has
11904 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011905 */
11906
11907void
11908xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011909 if (!xmlParserInitialized)
11910 return;
11911
Owen Taylor3473f882001-02-23 17:55:21 +000011912 xmlCleanupCharEncodingHandlers();
11913 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011914#ifdef LIBXML_CATALOG_ENABLED
11915 xmlCatalogCleanup();
11916#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011917 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011918 xmlCleanupGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011919 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011920}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011921
11922/************************************************************************
11923 * *
11924 * New set (2.6.0) of simpler and more flexible APIs *
11925 * *
11926 ************************************************************************/
11927
11928/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011929 * DICT_FREE:
11930 * @str: a string
11931 *
11932 * Free a string if it is not owned by the "dict" dictionnary in the
11933 * current scope
11934 */
11935#define DICT_FREE(str) \
11936 if ((str) && ((!dict) || \
11937 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11938 xmlFree((char *)(str));
11939
11940/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011941 * xmlCtxtReset:
11942 * @ctxt: an XML parser context
11943 *
11944 * Reset a parser context
11945 */
11946void
11947xmlCtxtReset(xmlParserCtxtPtr ctxt)
11948{
11949 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011950 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011951
11952 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11953 xmlFreeInputStream(input);
11954 }
11955 ctxt->inputNr = 0;
11956 ctxt->input = NULL;
11957
11958 ctxt->spaceNr = 0;
11959 ctxt->spaceTab[0] = -1;
11960 ctxt->space = &ctxt->spaceTab[0];
11961
11962
11963 ctxt->nodeNr = 0;
11964 ctxt->node = NULL;
11965
11966 ctxt->nameNr = 0;
11967 ctxt->name = NULL;
11968
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011969 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011970 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011971 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011972 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011973 DICT_FREE(ctxt->directory);
11974 ctxt->directory = NULL;
11975 DICT_FREE(ctxt->extSubURI);
11976 ctxt->extSubURI = NULL;
11977 DICT_FREE(ctxt->extSubSystem);
11978 ctxt->extSubSystem = NULL;
11979 if (ctxt->myDoc != NULL)
11980 xmlFreeDoc(ctxt->myDoc);
11981 ctxt->myDoc = NULL;
11982
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011983 ctxt->standalone = -1;
11984 ctxt->hasExternalSubset = 0;
11985 ctxt->hasPErefs = 0;
11986 ctxt->html = 0;
11987 ctxt->external = 0;
11988 ctxt->instate = XML_PARSER_START;
11989 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011990
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011991 ctxt->wellFormed = 1;
11992 ctxt->nsWellFormed = 1;
11993 ctxt->valid = 1;
11994 ctxt->vctxt.userData = ctxt;
11995 ctxt->vctxt.error = xmlParserValidityError;
11996 ctxt->vctxt.warning = xmlParserValidityWarning;
11997 ctxt->record_info = 0;
11998 ctxt->nbChars = 0;
11999 ctxt->checkIndex = 0;
12000 ctxt->inSubset = 0;
12001 ctxt->errNo = XML_ERR_OK;
12002 ctxt->depth = 0;
12003 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12004 ctxt->catalogs = NULL;
12005 xmlInitNodeInfoSeq(&ctxt->node_seq);
12006
12007 if (ctxt->attsDefault != NULL) {
12008 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12009 ctxt->attsDefault = NULL;
12010 }
12011 if (ctxt->attsSpecial != NULL) {
12012 xmlHashFree(ctxt->attsSpecial, NULL);
12013 ctxt->attsSpecial = NULL;
12014 }
12015
12016 if (ctxt->catalogs != NULL)
12017 xmlCatalogFreeLocal(ctxt->catalogs);
12018}
12019
12020/**
12021 * xmlCtxtUseOptions:
12022 * @ctxt: an XML parser context
12023 * @options: a combination of xmlParserOption(s)
12024 *
12025 * Applies the options to the parser context
12026 *
12027 * Returns 0 in case of success, the set of unknown or unimplemented options
12028 * in case of error.
12029 */
12030int
12031xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12032{
12033 if (options & XML_PARSE_RECOVER) {
12034 ctxt->recovery = 1;
12035 options -= XML_PARSE_RECOVER;
12036 } else
12037 ctxt->recovery = 0;
12038 if (options & XML_PARSE_DTDLOAD) {
12039 ctxt->loadsubset = XML_DETECT_IDS;
12040 options -= XML_PARSE_DTDLOAD;
12041 } else
12042 ctxt->loadsubset = 0;
12043 if (options & XML_PARSE_DTDATTR) {
12044 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12045 options -= XML_PARSE_DTDATTR;
12046 }
12047 if (options & XML_PARSE_NOENT) {
12048 ctxt->replaceEntities = 1;
12049 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12050 options -= XML_PARSE_NOENT;
12051 } else
12052 ctxt->replaceEntities = 0;
12053 if (options & XML_PARSE_NOWARNING) {
12054 ctxt->sax->warning = NULL;
12055 options -= XML_PARSE_NOWARNING;
12056 }
12057 if (options & XML_PARSE_NOERROR) {
12058 ctxt->sax->error = NULL;
12059 ctxt->sax->fatalError = NULL;
12060 options -= XML_PARSE_NOERROR;
12061 }
12062 if (options & XML_PARSE_PEDANTIC) {
12063 ctxt->pedantic = 1;
12064 options -= XML_PARSE_PEDANTIC;
12065 } else
12066 ctxt->pedantic = 0;
12067 if (options & XML_PARSE_NOBLANKS) {
12068 ctxt->keepBlanks = 0;
12069 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12070 options -= XML_PARSE_NOBLANKS;
12071 } else
12072 ctxt->keepBlanks = 1;
12073 if (options & XML_PARSE_DTDVALID) {
12074 ctxt->validate = 1;
12075 if (options & XML_PARSE_NOWARNING)
12076 ctxt->vctxt.warning = NULL;
12077 if (options & XML_PARSE_NOERROR)
12078 ctxt->vctxt.error = NULL;
12079 options -= XML_PARSE_DTDVALID;
12080 } else
12081 ctxt->validate = 0;
12082 if (options & XML_PARSE_SAX1) {
12083 ctxt->sax->startElement = xmlSAX2StartElement;
12084 ctxt->sax->endElement = xmlSAX2EndElement;
12085 ctxt->sax->startElementNs = NULL;
12086 ctxt->sax->endElementNs = NULL;
12087 ctxt->sax->initialized = 1;
12088 options -= XML_PARSE_SAX1;
12089 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012090 if (options & XML_PARSE_NODICT) {
12091 ctxt->dictNames = 0;
12092 options -= XML_PARSE_NODICT;
12093 } else {
12094 ctxt->dictNames = 1;
12095 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012096 return (options);
12097}
12098
12099/**
12100 * xmlDoRead:
12101 * @ctxt: an XML parser context
12102 * @encoding: the document encoding, or NULL
12103 * @options: a combination of xmlParserOption(s)
12104 * @reuse: keep the context for reuse
12105 *
12106 * Common front-end for the xmlRead functions
12107 *
12108 * Returns the resulting document tree or NULL
12109 */
12110static xmlDocPtr
12111xmlDoRead(xmlParserCtxtPtr ctxt, const char *encoding, int options, int reuse)
12112{
12113 xmlDocPtr ret;
12114
12115 xmlCtxtUseOptions(ctxt, options);
12116 if (encoding != NULL) {
12117 xmlCharEncodingHandlerPtr hdlr;
12118
12119 hdlr = xmlFindCharEncodingHandler(encoding);
12120 if (hdlr != NULL)
12121 xmlSwitchToEncoding(ctxt, hdlr);
12122 }
12123 xmlParseDocument(ctxt);
12124 if ((ctxt->wellFormed) || ctxt->recovery)
12125 ret = ctxt->myDoc;
12126 else {
12127 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012128 if (ctxt->myDoc != NULL) {
12129 ctxt->myDoc->dict = NULL;
12130 xmlFreeDoc(ctxt->myDoc);
12131 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012132 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012133 ctxt->myDoc = NULL;
12134 if (!reuse) {
12135 if ((ctxt->dictNames) &&
12136 (ret != NULL) &&
12137 (ret->dict == ctxt->dict))
12138 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012139 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012140 } else {
12141 /* Must duplicate the reference to the dictionary */
12142 if ((ctxt->dictNames) &&
12143 (ret != NULL) &&
12144 (ret->dict == ctxt->dict))
12145 xmlDictReference(ctxt->dict);
12146 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012147
12148 return (ret);
12149}
12150
12151/**
12152 * xmlReadDoc:
12153 * @cur: a pointer to a zero terminated string
12154 * @encoding: the document encoding, or NULL
12155 * @options: a combination of xmlParserOption(s)
12156 *
12157 * parse an XML in-memory document and build a tree.
12158 *
12159 * Returns the resulting document tree
12160 */
12161xmlDocPtr
12162xmlReadDoc(const xmlChar * cur, const char *encoding, int options)
12163{
12164 xmlParserCtxtPtr ctxt;
12165
12166 if (cur == NULL)
12167 return (NULL);
12168
12169 ctxt = xmlCreateDocParserCtxt(cur);
12170 if (ctxt == NULL)
12171 return (NULL);
12172 return (xmlDoRead(ctxt, encoding, options, 0));
12173}
12174
12175/**
12176 * xmlReadFile:
12177 * @filename: a file or URL
12178 * @encoding: the document encoding, or NULL
12179 * @options: a combination of xmlParserOption(s)
12180 *
12181 * parse an XML file from the filesystem or the network.
12182 *
12183 * Returns the resulting document tree
12184 */
12185xmlDocPtr
12186xmlReadFile(const char *filename, const char *encoding, int options)
12187{
12188 xmlParserCtxtPtr ctxt;
12189
12190 ctxt = xmlCreateFileParserCtxt(filename);
12191 if (ctxt == NULL)
12192 return (NULL);
12193 return (xmlDoRead(ctxt, encoding, options, 0));
12194}
12195
12196/**
12197 * xmlReadMemory:
12198 * @buffer: a pointer to a char array
12199 * @size: the size of the array
12200 * @encoding: the document encoding, or NULL
12201 * @options: a combination of xmlParserOption(s)
12202 *
12203 * parse an XML in-memory document and build a tree.
12204 *
12205 * Returns the resulting document tree
12206 */
12207xmlDocPtr
12208xmlReadMemory(const char *buffer, int size, const char *encoding, int options)
12209{
12210 xmlParserCtxtPtr ctxt;
12211
12212 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12213 if (ctxt == NULL)
12214 return (NULL);
12215 return (xmlDoRead(ctxt, encoding, options, 0));
12216}
12217
12218/**
12219 * xmlReadFd:
12220 * @fd: an open file descriptor
12221 * @encoding: the document encoding, or NULL
12222 * @options: a combination of xmlParserOption(s)
12223 *
12224 * parse an XML from a file descriptor and build a tree.
12225 *
12226 * Returns the resulting document tree
12227 */
12228xmlDocPtr
12229xmlReadFd(int fd, const char *encoding, int options)
12230{
12231 xmlParserCtxtPtr ctxt;
12232 xmlParserInputBufferPtr input;
12233 xmlParserInputPtr stream;
12234
12235 if (fd < 0)
12236 return (NULL);
12237
12238 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12239 if (input == NULL)
12240 return (NULL);
12241 ctxt = xmlNewParserCtxt();
12242 if (ctxt == NULL) {
12243 xmlFreeParserInputBuffer(input);
12244 return (NULL);
12245 }
12246 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12247 if (stream == NULL) {
12248 xmlFreeParserInputBuffer(input);
12249 xmlFreeParserCtxt(ctxt);
12250 return (NULL);
12251 }
12252 inputPush(ctxt, stream);
12253 return (xmlDoRead(ctxt, encoding, options, 0));
12254}
12255
12256/**
12257 * xmlReadIO:
12258 * @ioread: an I/O read function
12259 * @ioclose: an I/O close function
12260 * @ioctx: an I/O handler
12261 * @encoding: the document encoding, or NULL
12262 * @options: a combination of xmlParserOption(s)
12263 *
12264 * parse an XML document from I/O functions and source and build a tree.
12265 *
12266 * Returns the resulting document tree
12267 */
12268xmlDocPtr
12269xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12270 void *ioctx, const char *encoding, int options)
12271{
12272 xmlParserCtxtPtr ctxt;
12273 xmlParserInputBufferPtr input;
12274 xmlParserInputPtr stream;
12275
12276 if (ioread == NULL)
12277 return (NULL);
12278
12279 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12280 XML_CHAR_ENCODING_NONE);
12281 if (input == NULL)
12282 return (NULL);
12283 ctxt = xmlNewParserCtxt();
12284 if (ctxt == NULL) {
12285 xmlFreeParserInputBuffer(input);
12286 return (NULL);
12287 }
12288 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12289 if (stream == NULL) {
12290 xmlFreeParserInputBuffer(input);
12291 xmlFreeParserCtxt(ctxt);
12292 return (NULL);
12293 }
12294 inputPush(ctxt, stream);
12295 return (xmlDoRead(ctxt, encoding, options, 0));
12296}
12297
12298/**
12299 * xmlCtxtReadDoc:
12300 * @ctxt: an XML parser context
12301 * @cur: a pointer to a zero terminated string
12302 * @encoding: the document encoding, or NULL
12303 * @options: a combination of xmlParserOption(s)
12304 *
12305 * parse an XML in-memory document and build a tree.
12306 * This reuses the existing @ctxt parser context
12307 *
12308 * Returns the resulting document tree
12309 */
12310xmlDocPtr
12311xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
12312 const char *encoding, int options)
12313{
12314 xmlParserInputPtr stream;
12315
12316 if (cur == NULL)
12317 return (NULL);
12318 if (ctxt == NULL)
12319 return (NULL);
12320
12321 xmlCtxtReset(ctxt);
12322
12323 stream = xmlNewStringInputStream(ctxt, cur);
12324 if (stream == NULL) {
12325 return (NULL);
12326 }
12327 inputPush(ctxt, stream);
12328 return (xmlDoRead(ctxt, encoding, options, 1));
12329}
12330
12331/**
12332 * xmlCtxtReadFile:
12333 * @ctxt: an XML parser context
12334 * @filename: a file or URL
12335 * @encoding: the document encoding, or NULL
12336 * @options: a combination of xmlParserOption(s)
12337 *
12338 * parse an XML file from the filesystem or the network.
12339 * This reuses the existing @ctxt parser context
12340 *
12341 * Returns the resulting document tree
12342 */
12343xmlDocPtr
12344xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12345 const char *encoding, int options)
12346{
12347 xmlParserInputPtr stream;
12348
12349 if (filename == NULL)
12350 return (NULL);
12351 if (ctxt == NULL)
12352 return (NULL);
12353
12354 xmlCtxtReset(ctxt);
12355
12356 stream = xmlNewInputFromFile(ctxt, filename);
12357 if (stream == NULL) {
12358 return (NULL);
12359 }
12360 inputPush(ctxt, stream);
12361 return (xmlDoRead(ctxt, encoding, options, 1));
12362}
12363
12364/**
12365 * xmlCtxtReadMemory:
12366 * @ctxt: an XML parser context
12367 * @buffer: a pointer to a char array
12368 * @size: the size of the array
12369 * @encoding: the document encoding, or NULL
12370 * @options: a combination of xmlParserOption(s)
12371 *
12372 * parse an XML in-memory document and build a tree.
12373 * This reuses the existing @ctxt parser context
12374 *
12375 * Returns the resulting document tree
12376 */
12377xmlDocPtr
12378xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
12379 const char *encoding, int options)
12380{
12381 xmlParserInputBufferPtr input;
12382 xmlParserInputPtr stream;
12383
12384 if (ctxt == NULL)
12385 return (NULL);
12386 if (buffer == NULL)
12387 return (NULL);
12388
12389 xmlCtxtReset(ctxt);
12390
12391 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12392 if (input == NULL) {
12393 return(NULL);
12394 }
12395
12396 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12397 if (stream == NULL) {
12398 xmlFreeParserInputBuffer(input);
12399 return(NULL);
12400 }
12401
12402 inputPush(ctxt, stream);
12403 return (xmlDoRead(ctxt, encoding, options, 1));
12404}
12405
12406/**
12407 * xmlCtxtReadFd:
12408 * @ctxt: an XML parser context
12409 * @fd: an open file descriptor
12410 * @encoding: the document encoding, or NULL
12411 * @options: a combination of xmlParserOption(s)
12412 *
12413 * parse an XML from a file descriptor and build a tree.
12414 * This reuses the existing @ctxt parser context
12415 *
12416 * Returns the resulting document tree
12417 */
12418xmlDocPtr
12419xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, const char *encoding,
12420 int options)
12421{
12422 xmlParserInputBufferPtr input;
12423 xmlParserInputPtr stream;
12424
12425 if (fd < 0)
12426 return (NULL);
12427 if (ctxt == NULL)
12428 return (NULL);
12429
12430 xmlCtxtReset(ctxt);
12431
12432
12433 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12434 if (input == NULL)
12435 return (NULL);
12436 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12437 if (stream == NULL) {
12438 xmlFreeParserInputBuffer(input);
12439 return (NULL);
12440 }
12441 inputPush(ctxt, stream);
12442 return (xmlDoRead(ctxt, encoding, options, 1));
12443}
12444
12445/**
12446 * xmlCtxtReadIO:
12447 * @ctxt: an XML parser context
12448 * @ioread: an I/O read function
12449 * @ioclose: an I/O close function
12450 * @ioctx: an I/O handler
12451 * @encoding: the document encoding, or NULL
12452 * @options: a combination of xmlParserOption(s)
12453 *
12454 * parse an XML document from I/O functions and source and build a tree.
12455 * This reuses the existing @ctxt parser context
12456 *
12457 * Returns the resulting document tree
12458 */
12459xmlDocPtr
12460xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12461 xmlInputCloseCallback ioclose, void *ioctx,
12462 const char *encoding, int options)
12463{
12464 xmlParserInputBufferPtr input;
12465 xmlParserInputPtr stream;
12466
12467 if (ioread == NULL)
12468 return (NULL);
12469 if (ctxt == NULL)
12470 return (NULL);
12471
12472 xmlCtxtReset(ctxt);
12473
12474 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12475 XML_CHAR_ENCODING_NONE);
12476 if (input == NULL)
12477 return (NULL);
12478 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12479 if (stream == NULL) {
12480 xmlFreeParserInputBuffer(input);
12481 return (NULL);
12482 }
12483 inputPush(ctxt, stream);
12484 return (xmlDoRead(ctxt, encoding, options, 1));
12485}