blob: ff04816c1e1c54fad3083c2a4adc7e38bc92ac2a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
Daniel Veillard157fee02003-10-31 10:36:03 +0000144 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145 (ctxt->instate == XML_PARSER_EOF))
146 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000147 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000148 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000149 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
151 (const char *) localname, NULL, NULL, 0, 0,
152 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000153 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000154 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000155 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
156 (const char *) prefix, (const char *) localname,
157 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
158 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000159 ctxt->wellFormed = 0;
160 if (ctxt->recovery == 0)
161 ctxt->disableSAX = 1;
162}
163
164/**
165 * xmlFatalErr:
166 * @ctxt: an XML parser context
167 * @error: the error number
168 * @extra: extra information string
169 *
170 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
171 */
172static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000173xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174{
175 const char *errmsg;
176
Daniel Veillard157fee02003-10-31 10:36:03 +0000177 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
178 (ctxt->instate == XML_PARSER_EOF))
179 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 switch (error) {
181 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid hexadecimal value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid decimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "internal error";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference at end of document\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference in prolog\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in epilog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference: no name\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: expecting ';'\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "Detected an entity reference loop\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReferences forbidden in internal subset\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "AttValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unescaped '<' not allowed in attributes values\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "SystemLiteral \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unfinished System or Public ID \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Sequence ']]>' not allowed in content\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "PUBLIC, the Public Identifier is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Comment must not contain '--' (double-hyphen)\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "xmlParsePI : no target name\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Invalid PI name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "NOTATION: Name expected here\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "'>' required to close NOTATION declaration\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "Entity value required\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Fragment not allowed";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "'(' required to start ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "NmToken expected in ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "')' required to finish ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "ContentDecl : Name or '(' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg =
285 "PEReference: forbidden within markup decl in internal subset\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "expected '>'\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "XML conditional section '[' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "Content error in the external subset\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID_KEYWORD:
297 errmsg =
298 "conditional section INCLUDE or IGNORE keyword expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "XML conditional section not closed\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "Text declaration '<?xml' required\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "parsing XML declaration: '?>' expected\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "external parsed entities cannot be standalone\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EntityRef: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "DOCTYPE improperly terminated\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EndTag: '</' not found\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "expected '='\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "String not closed expecting \" or '\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not started expecting ' or \"\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Invalid XML encoding name\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "standalone accepts only 'yes' or 'no'\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Document is empty\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Extra content at the end of the document\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "chunk is not well balanced\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "extra content at the end of well balanced chunk\n";
347 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000348 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Malformed declaration expecting version\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 case:
353 errmsg = "\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 default:
357 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 }
359 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000360 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
362 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 ctxt->wellFormed = 0;
364 if (ctxt->recovery == 0)
365 ctxt->disableSAX = 1;
366}
367
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368/**
369 * xmlFatalErrMsg:
370 * @ctxt: an XML parser context
371 * @error: the error number
372 * @msg: the error message
373 *
374 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
375 */
376static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
378 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000379{
Daniel Veillard157fee02003-10-31 10:36:03 +0000380 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381 (ctxt->instate == XML_PARSER_EOF))
382 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000384 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->wellFormed = 0;
387 if (ctxt->recovery == 0)
388 ctxt->disableSAX = 1;
389}
390
391/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000392 * xmlWarningMsg:
393 * @ctxt: an XML parser context
394 * @error: the error number
395 * @msg: the error message
396 * @str1: extra data
397 * @str2: extra data
398 *
399 * Handle a warning.
400 */
401static void
402xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403 const char *msg, const xmlChar *str1, const xmlChar *str2)
404{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000405 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000406
Daniel Veillard157fee02003-10-31 10:36:03 +0000407 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
408 (ctxt->instate == XML_PARSER_EOF))
409 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000411 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000412 schannel = ctxt->sax->serror;
413 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000414 (ctxt->sax) ? ctxt->sax->warning : NULL,
415 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000416 ctxt, NULL, XML_FROM_PARSER, error,
417 XML_ERR_WARNING, NULL, 0,
418 (const char *) str1, (const char *) str2, NULL, 0, 0,
419 msg, (const char *) str1, (const char *) str2);
420}
421
422/**
423 * xmlValidityError:
424 * @ctxt: an XML parser context
425 * @error: the error number
426 * @msg: the error message
427 * @str1: extra data
428 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000429 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000430 */
431static void
432xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
433 const char *msg, const xmlChar *str1)
434{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000435 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000436
437 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
438 (ctxt->instate == XML_PARSER_EOF))
439 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000440 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000441 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000442 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000444 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000445 ctxt, NULL, XML_FROM_DTD, error,
446 XML_ERR_ERROR, NULL, 0, (const char *) str1,
447 NULL, NULL, 0, 0,
448 msg, (const char *) str1);
449 ctxt->valid = 0;
450}
451
452/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 * xmlFatalErrMsgInt:
454 * @ctxt: an XML parser context
455 * @error: the error number
456 * @msg: the error message
457 * @val: an integer value
458 *
459 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
460 */
461static void
462xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464{
Daniel Veillard157fee02003-10-31 10:36:03 +0000465 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
466 (ctxt->instate == XML_PARSER_EOF))
467 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
471 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475}
476
477/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000478 * xmlFatalErrMsgStrIntStr:
479 * @ctxt: an XML parser context
480 * @error: the error number
481 * @msg: the error message
482 * @str1: an string info
483 * @val: an integer value
484 * @str2: an string info
485 *
486 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
487 */
488static void
489xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
490 const char *msg, const xmlChar *str1, int val,
491 const xmlChar *str2)
492{
Daniel Veillard157fee02003-10-31 10:36:03 +0000493 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
494 (ctxt->instate == XML_PARSER_EOF))
495 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) str1, (const char *) str2,
500 NULL, val, 0, msg, str1, val, str2);
501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000507 * xmlFatalErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
514 */
515static void
516xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
526 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000527 ctxt->wellFormed = 0;
528 if (ctxt->recovery == 0)
529 ctxt->disableSAX = 1;
530}
531
532/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000533 * xmlErrMsgStr:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @val: a string value
538 *
539 * Handle a non fatal parser error
540 */
541static void
542xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
543 const char *msg, const xmlChar * val)
544{
Daniel Veillard157fee02003-10-31 10:36:03 +0000545 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
546 (ctxt->instate == XML_PARSER_EOF))
547 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000548 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000549 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 XML_FROM_PARSER, error, XML_ERR_ERROR,
551 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
552 val);
553}
554
555/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000556 * xmlNsErr:
557 * @ctxt: an XML parser context
558 * @error: the error number
559 * @msg: the message
560 * @info1: extra information string
561 * @info2: extra information string
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void
566xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 const xmlChar * info1, const xmlChar * info2,
569 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570{
Daniel Veillard157fee02003-10-31 10:36:03 +0000571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 XML_ERR_ERROR, NULL, 0, (const char *) info1,
577 (const char *) info2, (const char *) info3, 0, 0, msg,
578 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000579 ctxt->nsWellFormed = 0;
580}
581
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000582/************************************************************************
583 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000584 * SAX2 defaulted attributes handling *
585 * *
586 ************************************************************************/
587
588/**
589 * xmlDetectSAX2:
590 * @ctxt: an XML parser context
591 *
592 * Do the SAX2 detection and specific intialization
593 */
594static void
595xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
596 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000597#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
599 ((ctxt->sax->startElementNs != NULL) ||
600 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000601#else
602 ctxt->sax2 = 1;
603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000604
605 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
606 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
607 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
608}
609
Daniel Veillarde57ec792003-09-10 10:50:59 +0000610typedef struct _xmlDefAttrs xmlDefAttrs;
611typedef xmlDefAttrs *xmlDefAttrsPtr;
612struct _xmlDefAttrs {
613 int nbAttrs; /* number of defaulted attributes on that element */
614 int maxAttrs; /* the size of the array */
615 const xmlChar *values[4]; /* array of localname/prefix/values */
616};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617
618/**
619 * xmlAddDefAttrs:
620 * @ctxt: an XML parser context
621 * @fullname: the element fullname
622 * @fullattr: the attribute fullname
623 * @value: the attribute value
624 *
625 * Add a defaulted attribute for an element
626 */
627static void
628xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
629 const xmlChar *fullname,
630 const xmlChar *fullattr,
631 const xmlChar *value) {
632 xmlDefAttrsPtr defaults;
633 int len;
634 const xmlChar *name;
635 const xmlChar *prefix;
636
637 if (ctxt->attsDefault == NULL) {
638 ctxt->attsDefault = xmlHashCreate(10);
639 if (ctxt->attsDefault == NULL)
640 goto mem_error;
641 }
642
643 /*
644 * plit the element name into prefix:localname , the string found
645 * are within the DTD and hen not associated to namespace names.
646 */
647 name = xmlSplitQName3(fullname, &len);
648 if (name == NULL) {
649 name = xmlDictLookup(ctxt->dict, fullname, -1);
650 prefix = NULL;
651 } else {
652 name = xmlDictLookup(ctxt->dict, name, -1);
653 prefix = xmlDictLookup(ctxt->dict, fullname, len);
654 }
655
656 /*
657 * make sure there is some storage
658 */
659 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
660 if (defaults == NULL) {
661 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
662 12 * sizeof(const xmlChar *));
663 if (defaults == NULL)
664 goto mem_error;
665 defaults->maxAttrs = 4;
666 defaults->nbAttrs = 0;
667 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
668 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
669 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
670 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
671 if (defaults == NULL)
672 goto mem_error;
673 defaults->maxAttrs *= 2;
674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 }
676
677 /*
678 * plit the element name into prefix:localname , the string found
679 * are within the DTD and hen not associated to namespace names.
680 */
681 name = xmlSplitQName3(fullattr, &len);
682 if (name == NULL) {
683 name = xmlDictLookup(ctxt->dict, fullattr, -1);
684 prefix = NULL;
685 } else {
686 name = xmlDictLookup(ctxt->dict, name, -1);
687 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
688 }
689
690 defaults->values[4 * defaults->nbAttrs] = name;
691 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
692 /* intern the string and precompute the end */
693 len = xmlStrlen(value);
694 value = xmlDictLookup(ctxt->dict, value, len);
695 defaults->values[4 * defaults->nbAttrs + 2] = value;
696 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
697 defaults->nbAttrs++;
698
699 return;
700
701mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000703 return;
704}
705
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000706/**
707 * xmlAddSpecialAttr:
708 * @ctxt: an XML parser context
709 * @fullname: the element fullname
710 * @fullattr: the attribute fullname
711 * @type: the attribute type
712 *
713 * Register that this attribute is not CDATA
714 */
715static void
716xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
717 const xmlChar *fullname,
718 const xmlChar *fullattr,
719 int type)
720{
721 if (ctxt->attsSpecial == NULL) {
722 ctxt->attsSpecial = xmlHashCreate(10);
723 if (ctxt->attsSpecial == NULL)
724 goto mem_error;
725 }
726
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000727 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
728 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000729 return;
730
731mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000732 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 return;
734}
735
Daniel Veillard4432df22003-09-28 18:58:27 +0000736/**
737 * xmlCheckLanguageID:
738 * @lang: pointer to the string value
739 *
740 * Checks that the value conforms to the LanguageID production:
741 *
742 * NOTE: this is somewhat deprecated, those productions were removed from
743 * the XML Second edition.
744 *
745 * [33] LanguageID ::= Langcode ('-' Subcode)*
746 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
747 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
748 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
749 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
750 * [38] Subcode ::= ([a-z] | [A-Z])+
751 *
752 * Returns 1 if correct 0 otherwise
753 **/
754int
755xmlCheckLanguageID(const xmlChar * lang)
756{
757 const xmlChar *cur = lang;
758
759 if (cur == NULL)
760 return (0);
761 if (((cur[0] == 'i') && (cur[1] == '-')) ||
762 ((cur[0] == 'I') && (cur[1] == '-'))) {
763 /*
764 * IANA code
765 */
766 cur += 2;
767 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
768 ((cur[0] >= 'a') && (cur[0] <= 'z')))
769 cur++;
770 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
771 ((cur[0] == 'X') && (cur[1] == '-'))) {
772 /*
773 * User code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
780 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
781 /*
782 * ISO639
783 */
784 cur++;
785 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 else
789 return (0);
790 } else
791 return (0);
792 while (cur[0] != 0) { /* non input consuming */
793 if (cur[0] != '-')
794 return (0);
795 cur++;
796 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
797 ((cur[0] >= 'a') && (cur[0] <= 'z')))
798 cur++;
799 else
800 return (0);
801 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
802 ((cur[0] >= 'a') && (cur[0] <= 'z')))
803 cur++;
804 }
805 return (1);
806}
807
Owen Taylor3473f882001-02-23 17:55:21 +0000808/************************************************************************
809 * *
810 * Parser stacks related functions and macros *
811 * *
812 ************************************************************************/
813
814xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
815 const xmlChar ** str);
816
Daniel Veillard0fb18932003-09-07 09:14:37 +0000817#ifdef SAX2
818/**
819 * nsPush:
820 * @ctxt: an XML parser context
821 * @prefix: the namespace prefix or NULL
822 * @URL: the namespace name
823 *
824 * Pushes a new parser namespace on top of the ns stack
825 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000826 * Returns -1 in case of error, -2 if the namespace should be discarded
827 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000828 */
829static int
830nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
831{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000832 if (ctxt->options & XML_PARSE_NSCLEAN) {
833 int i;
834 for (i = 0;i < ctxt->nsNr;i += 2) {
835 if (ctxt->nsTab[i] == prefix) {
836 /* in scope */
837 if (ctxt->nsTab[i + 1] == URL)
838 return(-2);
839 /* out of scope keep it */
840 break;
841 }
842 }
843 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000844 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
845 ctxt->nsMax = 10;
846 ctxt->nsNr = 0;
847 ctxt->nsTab = (const xmlChar **)
848 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
849 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000850 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000851 ctxt->nsMax = 0;
852 return (-1);
853 }
854 } else if (ctxt->nsNr >= ctxt->nsMax) {
855 ctxt->nsMax *= 2;
856 ctxt->nsTab = (const xmlChar **)
857 xmlRealloc(ctxt->nsTab,
858 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax /= 2;
862 return (-1);
863 }
864 }
865 ctxt->nsTab[ctxt->nsNr++] = prefix;
866 ctxt->nsTab[ctxt->nsNr++] = URL;
867 return (ctxt->nsNr);
868}
869/**
870 * nsPop:
871 * @ctxt: an XML parser context
872 * @nr: the number to pop
873 *
874 * Pops the top @nr parser prefix/namespace from the ns stack
875 *
876 * Returns the number of namespaces removed
877 */
878static int
879nsPop(xmlParserCtxtPtr ctxt, int nr)
880{
881 int i;
882
883 if (ctxt->nsTab == NULL) return(0);
884 if (ctxt->nsNr < nr) {
885 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
886 nr = ctxt->nsNr;
887 }
888 if (ctxt->nsNr <= 0)
889 return (0);
890
891 for (i = 0;i < nr;i++) {
892 ctxt->nsNr--;
893 ctxt->nsTab[ctxt->nsNr] = NULL;
894 }
895 return(nr);
896}
897#endif
898
899static int
900xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
901 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000902 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000903 int maxatts;
904
905 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000906 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000907 atts = (const xmlChar **)
908 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000910 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
912 if (attallocs == NULL) goto mem_error;
913 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000914 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 } else if (nr + 5 > ctxt->maxatts) {
916 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
918 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
922 (maxatts / 5) * sizeof(int));
923 if (attallocs == NULL) goto mem_error;
924 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000925 ctxt->maxatts = maxatts;
926 }
927 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000929 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000931}
932
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933/**
934 * inputPush:
935 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000936 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000937 *
938 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000939 *
940 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000941 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000942extern int
943inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
944{
945 if (ctxt->inputNr >= ctxt->inputMax) {
946 ctxt->inputMax *= 2;
947 ctxt->inputTab =
948 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
949 ctxt->inputMax *
950 sizeof(ctxt->inputTab[0]));
951 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000952 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953 return (0);
954 }
955 }
956 ctxt->inputTab[ctxt->inputNr] = value;
957 ctxt->input = value;
958 return (ctxt->inputNr++);
959}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000962 * @ctxt: an XML parser context
963 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000965 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000966 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000967 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000968extern xmlParserInputPtr
969inputPop(xmlParserCtxtPtr ctxt)
970{
971 xmlParserInputPtr ret;
972
973 if (ctxt->inputNr <= 0)
974 return (0);
975 ctxt->inputNr--;
976 if (ctxt->inputNr > 0)
977 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
978 else
979 ctxt->input = NULL;
980 ret = ctxt->inputTab[ctxt->inputNr];
981 ctxt->inputTab[ctxt->inputNr] = 0;
982 return (ret);
983}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000984/**
985 * nodePush:
986 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000987 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000988 *
989 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000990 *
991 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000992 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000993extern int
994nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
995{
996 if (ctxt->nodeNr >= ctxt->nodeMax) {
997 ctxt->nodeMax *= 2;
998 ctxt->nodeTab =
999 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1000 ctxt->nodeMax *
1001 sizeof(ctxt->nodeTab[0]));
1002 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001003 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001004 return (0);
1005 }
1006 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001007 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001008 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001009 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1010 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001011 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001012 return(0);
1013 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001014 ctxt->nodeTab[ctxt->nodeNr] = value;
1015 ctxt->node = value;
1016 return (ctxt->nodeNr++);
1017}
1018/**
1019 * nodePop:
1020 * @ctxt: an XML parser context
1021 *
1022 * Pops the top element node from the node stack
1023 *
1024 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001025 */
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026extern xmlNodePtr
1027nodePop(xmlParserCtxtPtr ctxt)
1028{
1029 xmlNodePtr ret;
1030
1031 if (ctxt->nodeNr <= 0)
1032 return (0);
1033 ctxt->nodeNr--;
1034 if (ctxt->nodeNr > 0)
1035 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1036 else
1037 ctxt->node = NULL;
1038 ret = ctxt->nodeTab[ctxt->nodeNr];
1039 ctxt->nodeTab[ctxt->nodeNr] = 0;
1040 return (ret);
1041}
1042/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 * nameNsPush:
1044 * @ctxt: an XML parser context
1045 * @value: the element name
1046 * @prefix: the element prefix
1047 * @URI: the element namespace name
1048 *
1049 * Pushes a new element name/prefix/URL on top of the name stack
1050 *
1051 * Returns -1 in case of error, the index in the stack otherwise
1052 */
1053static int
1054nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1055 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1056{
1057 if (ctxt->nameNr >= ctxt->nameMax) {
1058 const xmlChar * *tmp;
1059 void **tmp2;
1060 ctxt->nameMax *= 2;
1061 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1062 ctxt->nameMax *
1063 sizeof(ctxt->nameTab[0]));
1064 if (tmp == NULL) {
1065 ctxt->nameMax /= 2;
1066 goto mem_error;
1067 }
1068 ctxt->nameTab = tmp;
1069 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1070 ctxt->nameMax * 3 *
1071 sizeof(ctxt->pushTab[0]));
1072 if (tmp2 == NULL) {
1073 ctxt->nameMax /= 2;
1074 goto mem_error;
1075 }
1076 ctxt->pushTab = tmp2;
1077 }
1078 ctxt->nameTab[ctxt->nameNr] = value;
1079 ctxt->name = value;
1080 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1081 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001082 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083 return (ctxt->nameNr++);
1084mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001085 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 return (-1);
1087}
1088/**
1089 * nameNsPop:
1090 * @ctxt: an XML parser context
1091 *
1092 * Pops the top element/prefix/URI name from the name stack
1093 *
1094 * Returns the name just removed
1095 */
1096static const xmlChar *
1097nameNsPop(xmlParserCtxtPtr ctxt)
1098{
1099 const xmlChar *ret;
1100
1101 if (ctxt->nameNr <= 0)
1102 return (0);
1103 ctxt->nameNr--;
1104 if (ctxt->nameNr > 0)
1105 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1106 else
1107 ctxt->name = NULL;
1108 ret = ctxt->nameTab[ctxt->nameNr];
1109 ctxt->nameTab[ctxt->nameNr] = NULL;
1110 return (ret);
1111}
1112
1113/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001114 * namePush:
1115 * @ctxt: an XML parser context
1116 * @value: the element name
1117 *
1118 * Pushes a new element name on top of the name stack
1119 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001121 */
1122extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001123namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001124{
1125 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001127 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001129 ctxt->nameMax *
1130 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001131 if (tmp == NULL) {
1132 ctxt->nameMax /= 2;
1133 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001135 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001136 }
1137 ctxt->nameTab[ctxt->nameNr] = value;
1138 ctxt->name = value;
1139 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001141 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001142 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001143}
1144/**
1145 * namePop:
1146 * @ctxt: an XML parser context
1147 *
1148 * Pops the top element name from the name stack
1149 *
1150 * Returns the name just removed
1151 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001152extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001153namePop(xmlParserCtxtPtr ctxt)
1154{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001155 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156
1157 if (ctxt->nameNr <= 0)
1158 return (0);
1159 ctxt->nameNr--;
1160 if (ctxt->nameNr > 0)
1161 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1162 else
1163 ctxt->name = NULL;
1164 ret = ctxt->nameTab[ctxt->nameNr];
1165 ctxt->nameTab[ctxt->nameNr] = 0;
1166 return (ret);
1167}
Owen Taylor3473f882001-02-23 17:55:21 +00001168
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001169static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001170 if (ctxt->spaceNr >= ctxt->spaceMax) {
1171 ctxt->spaceMax *= 2;
1172 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1173 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1174 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001175 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001176 return(0);
1177 }
1178 }
1179 ctxt->spaceTab[ctxt->spaceNr] = val;
1180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1181 return(ctxt->spaceNr++);
1182}
1183
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001184static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001185 int ret;
1186 if (ctxt->spaceNr <= 0) return(0);
1187 ctxt->spaceNr--;
1188 if (ctxt->spaceNr > 0)
1189 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1190 else
1191 ctxt->space = NULL;
1192 ret = ctxt->spaceTab[ctxt->spaceNr];
1193 ctxt->spaceTab[ctxt->spaceNr] = -1;
1194 return(ret);
1195}
1196
1197/*
1198 * Macros for accessing the content. Those should be used only by the parser,
1199 * and not exported.
1200 *
1201 * Dirty macros, i.e. one often need to make assumption on the context to
1202 * use them
1203 *
1204 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1205 * To be used with extreme caution since operations consuming
1206 * characters may move the input buffer to a different location !
1207 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1208 * This should be used internally by the parser
1209 * only to compare to ASCII values otherwise it would break when
1210 * running with UTF-8 encoding.
1211 * RAW same as CUR but in the input buffer, bypass any token
1212 * extraction that may have been done
1213 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1214 * to compare on ASCII based substring.
1215 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001216 * strings without newlines within the parser.
1217 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1218 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001219 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1220 *
1221 * NEXT Skip to the next character, this does the proper decoding
1222 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001223 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001224 * CUR_CHAR(l) returns the current unicode character (int), set l
1225 * to the number of xmlChars used for the encoding [0-5].
1226 * CUR_SCHAR same but operate on a string instead of the context
1227 * COPY_BUF copy the current unicode char to the target buffer, increment
1228 * the index
1229 * GROW, SHRINK handling of input buffers
1230 */
1231
Daniel Veillardfdc91562002-07-01 21:52:03 +00001232#define RAW (*ctxt->input->cur)
1233#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001234#define NXT(val) ctxt->input->cur[(val)]
1235#define CUR_PTR ctxt->input->cur
1236
Daniel Veillarda07050d2003-10-19 14:46:32 +00001237#define CMP4( s, c1, c2, c3, c4 ) \
1238 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1239 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1240#define CMP5( s, c1, c2, c3, c4, c5 ) \
1241 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1242#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1243 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1244#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1245 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1246#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1247 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1248#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1249 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1250 ((unsigned char *) s)[ 8 ] == c9 )
1251#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1252 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1253 ((unsigned char *) s)[ 9 ] == c10 )
1254
Owen Taylor3473f882001-02-23 17:55:21 +00001255#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001256 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001257 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001258 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001259 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1260 xmlPopInput(ctxt); \
1261 } while (0)
1262
Daniel Veillarda880b122003-04-21 21:36:41 +00001263#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001264 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1265 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001266 xmlSHRINK (ctxt);
1267
1268static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1269 xmlParserInputShrink(ctxt->input);
1270 if ((*ctxt->input->cur == 0) &&
1271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1272 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001273 }
Owen Taylor3473f882001-02-23 17:55:21 +00001274
Daniel Veillarda880b122003-04-21 21:36:41 +00001275#define GROW if ((ctxt->progressive == 0) && \
1276 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001277 xmlGROW (ctxt);
1278
1279static void xmlGROW (xmlParserCtxtPtr ctxt) {
1280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1281 if ((*ctxt->input->cur == 0) &&
1282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1283 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001284}
Owen Taylor3473f882001-02-23 17:55:21 +00001285
1286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1287
1288#define NEXT xmlNextChar(ctxt)
1289
Daniel Veillard21a0f912001-02-25 19:54:14 +00001290#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001291 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001292 ctxt->input->cur++; \
1293 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001294 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001295 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1296 }
1297
Owen Taylor3473f882001-02-23 17:55:21 +00001298#define NEXTL(l) do { \
1299 if (*(ctxt->input->cur) == '\n') { \
1300 ctxt->input->line++; ctxt->input->col = 1; \
1301 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001302 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001303 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001304 } while (0)
1305
1306#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1307#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1308
1309#define COPY_BUF(l,b,i,v) \
1310 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001311 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001312
1313/**
1314 * xmlSkipBlankChars:
1315 * @ctxt: the XML parser context
1316 *
1317 * skip all blanks character found at that point in the input streams.
1318 * It pops up finished entities in the process if allowable at that point.
1319 *
1320 * Returns the number of space chars skipped
1321 */
1322
1323int
1324xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001325 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001326
1327 /*
1328 * It's Okay to use CUR/NEXT here since all the blanks are on
1329 * the ASCII range.
1330 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001331 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1332 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001333 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001334 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001336 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001337 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001338 if (*cur == '\n') {
1339 ctxt->input->line++; ctxt->input->col = 1;
1340 }
1341 cur++;
1342 res++;
1343 if (*cur == 0) {
1344 ctxt->input->cur = cur;
1345 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1346 cur = ctxt->input->cur;
1347 }
1348 }
1349 ctxt->input->cur = cur;
1350 } else {
1351 int cur;
1352 do {
1353 cur = CUR;
1354 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1355 NEXT;
1356 cur = CUR;
1357 res++;
1358 }
1359 while ((cur == 0) && (ctxt->inputNr > 1) &&
1360 (ctxt->instate != XML_PARSER_COMMENT)) {
1361 xmlPopInput(ctxt);
1362 cur = CUR;
1363 }
1364 /*
1365 * Need to handle support of entities branching here
1366 */
1367 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1368 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1369 }
Owen Taylor3473f882001-02-23 17:55:21 +00001370 return(res);
1371}
1372
1373/************************************************************************
1374 * *
1375 * Commodity functions to handle entities *
1376 * *
1377 ************************************************************************/
1378
1379/**
1380 * xmlPopInput:
1381 * @ctxt: an XML parser context
1382 *
1383 * xmlPopInput: the current input pointed by ctxt->input came to an end
1384 * pop it and return the next char.
1385 *
1386 * Returns the current xmlChar in the parser context
1387 */
1388xmlChar
1389xmlPopInput(xmlParserCtxtPtr ctxt) {
1390 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1391 if (xmlParserDebugEntities)
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Popping input %d\n", ctxt->inputNr);
1394 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001395 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001396 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1397 return(xmlPopInput(ctxt));
1398 return(CUR);
1399}
1400
1401/**
1402 * xmlPushInput:
1403 * @ctxt: an XML parser context
1404 * @input: an XML parser input fragment (entity, XML fragment ...).
1405 *
1406 * xmlPushInput: switch to a new input stream which is stacked on top
1407 * of the previous one(s).
1408 */
1409void
1410xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1411 if (input == NULL) return;
1412
1413 if (xmlParserDebugEntities) {
1414 if ((ctxt->input != NULL) && (ctxt->input->filename))
1415 xmlGenericError(xmlGenericErrorContext,
1416 "%s(%d): ", ctxt->input->filename,
1417 ctxt->input->line);
1418 xmlGenericError(xmlGenericErrorContext,
1419 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1420 }
1421 inputPush(ctxt, input);
1422 GROW;
1423}
1424
1425/**
1426 * xmlParseCharRef:
1427 * @ctxt: an XML parser context
1428 *
1429 * parse Reference declarations
1430 *
1431 * [66] CharRef ::= '&#' [0-9]+ ';' |
1432 * '&#x' [0-9a-fA-F]+ ';'
1433 *
1434 * [ WFC: Legal Character ]
1435 * Characters referred to using character references must match the
1436 * production for Char.
1437 *
1438 * Returns the value parsed (as an int), 0 in case of error
1439 */
1440int
1441xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001442 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 int count = 0;
1444
Owen Taylor3473f882001-02-23 17:55:21 +00001445 /*
1446 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1447 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001448 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001449 (NXT(2) == 'x')) {
1450 SKIP(3);
1451 GROW;
1452 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001453 if (count++ > 20) {
1454 count = 0;
1455 GROW;
1456 }
1457 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = val * 16 + (CUR - '0');
1459 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1460 val = val * 16 + (CUR - 'a') + 10;
1461 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1462 val = val * 16 + (CUR - 'A') + 10;
1463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001465 val = 0;
1466 break;
1467 }
1468 NEXT;
1469 count++;
1470 }
1471 if (RAW == ';') {
1472 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001473 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001474 ctxt->nbChars ++;
1475 ctxt->input->cur++;
1476 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001477 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001478 SKIP(2);
1479 GROW;
1480 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001481 if (count++ > 20) {
1482 count = 0;
1483 GROW;
1484 }
1485 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001486 val = val * 10 + (CUR - '0');
1487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001489 val = 0;
1490 break;
1491 }
1492 NEXT;
1493 count++;
1494 }
1495 if (RAW == ';') {
1496 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001497 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001498 ctxt->nbChars ++;
1499 ctxt->input->cur++;
1500 }
1501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 }
1504
1505 /*
1506 * [ WFC: Legal Character ]
1507 * Characters referred to using character references must match the
1508 * production for Char.
1509 */
William M. Brack871611b2003-10-18 04:53:14 +00001510 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001511 return(val);
1512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1514 "xmlParseCharRef: invalid xmlChar value %d\n",
1515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001516 }
1517 return(0);
1518}
1519
1520/**
1521 * xmlParseStringCharRef:
1522 * @ctxt: an XML parser context
1523 * @str: a pointer to an index in the string
1524 *
1525 * parse Reference declarations, variant parsing from a string rather
1526 * than an an input flow.
1527 *
1528 * [66] CharRef ::= '&#' [0-9]+ ';' |
1529 * '&#x' [0-9a-fA-F]+ ';'
1530 *
1531 * [ WFC: Legal Character ]
1532 * Characters referred to using character references must match the
1533 * production for Char.
1534 *
1535 * Returns the value parsed (as an int), 0 in case of error, str will be
1536 * updated to the current value of the index
1537 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001538static int
Owen Taylor3473f882001-02-23 17:55:21 +00001539xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1540 const xmlChar *ptr;
1541 xmlChar cur;
1542 int val = 0;
1543
1544 if ((str == NULL) || (*str == NULL)) return(0);
1545 ptr = *str;
1546 cur = *ptr;
1547 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1548 ptr += 3;
1549 cur = *ptr;
1550 while (cur != ';') { /* Non input consuming loop */
1551 if ((cur >= '0') && (cur <= '9'))
1552 val = val * 16 + (cur - '0');
1553 else if ((cur >= 'a') && (cur <= 'f'))
1554 val = val * 16 + (cur - 'a') + 10;
1555 else if ((cur >= 'A') && (cur <= 'F'))
1556 val = val * 16 + (cur - 'A') + 10;
1557 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001558 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001559 val = 0;
1560 break;
1561 }
1562 ptr++;
1563 cur = *ptr;
1564 }
1565 if (cur == ';')
1566 ptr++;
1567 } else if ((cur == '&') && (ptr[1] == '#')){
1568 ptr += 2;
1569 cur = *ptr;
1570 while (cur != ';') { /* Non input consuming loops */
1571 if ((cur >= '0') && (cur <= '9'))
1572 val = val * 10 + (cur - '0');
1573 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001574 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001575 val = 0;
1576 break;
1577 }
1578 ptr++;
1579 cur = *ptr;
1580 }
1581 if (cur == ';')
1582 ptr++;
1583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001584 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001585 return(0);
1586 }
1587 *str = ptr;
1588
1589 /*
1590 * [ WFC: Legal Character ]
1591 * Characters referred to using character references must match the
1592 * production for Char.
1593 */
William M. Brack871611b2003-10-18 04:53:14 +00001594 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001595 return(val);
1596 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001597 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1598 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1599 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001600 }
1601 return(0);
1602}
1603
1604/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 * xmlNewBlanksWrapperInputStream:
1606 * @ctxt: an XML parser context
1607 * @entity: an Entity pointer
1608 *
1609 * Create a new input stream for wrapping
1610 * blanks around a PEReference
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614
1615static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1616
Daniel Veillardf4862f02002-09-10 11:13:43 +00001617static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001618xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1619 xmlParserInputPtr input;
1620 xmlChar *buffer;
1621 size_t length;
1622 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1624 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001625 return(NULL);
1626 }
1627 if (xmlParserDebugEntities)
1628 xmlGenericError(xmlGenericErrorContext,
1629 "new blanks wrapper for entity: %s\n", entity->name);
1630 input = xmlNewInputStream(ctxt);
1631 if (input == NULL) {
1632 return(NULL);
1633 }
1634 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001635 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001636 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001638 return(NULL);
1639 }
1640 buffer [0] = ' ';
1641 buffer [1] = '%';
1642 buffer [length-3] = ';';
1643 buffer [length-2] = ' ';
1644 buffer [length-1] = 0;
1645 memcpy(buffer + 2, entity->name, length - 5);
1646 input->free = deallocblankswrapper;
1647 input->base = buffer;
1648 input->cur = buffer;
1649 input->length = length;
1650 input->end = &buffer[length];
1651 return(input);
1652}
1653
1654/**
Owen Taylor3473f882001-02-23 17:55:21 +00001655 * xmlParserHandlePEReference:
1656 * @ctxt: the parser context
1657 *
1658 * [69] PEReference ::= '%' Name ';'
1659 *
1660 * [ WFC: No Recursion ]
1661 * A parsed entity must not contain a recursive
1662 * reference to itself, either directly or indirectly.
1663 *
1664 * [ WFC: Entity Declared ]
1665 * In a document without any DTD, a document with only an internal DTD
1666 * subset which contains no parameter entity references, or a document
1667 * with "standalone='yes'", ... ... The declaration of a parameter
1668 * entity must precede any reference to it...
1669 *
1670 * [ VC: Entity Declared ]
1671 * In a document with an external subset or external parameter entities
1672 * with "standalone='no'", ... ... The declaration of a parameter entity
1673 * must precede any reference to it...
1674 *
1675 * [ WFC: In DTD ]
1676 * Parameter-entity references may only appear in the DTD.
1677 * NOTE: misleading but this is handled.
1678 *
1679 * A PEReference may have been detected in the current input stream
1680 * the handling is done accordingly to
1681 * http://www.w3.org/TR/REC-xml#entproc
1682 * i.e.
1683 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001684 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001685 */
1686void
1687xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001689 xmlEntityPtr entity = NULL;
1690 xmlParserInputPtr input;
1691
Owen Taylor3473f882001-02-23 17:55:21 +00001692 if (RAW != '%') return;
1693 switch(ctxt->instate) {
1694 case XML_PARSER_CDATA_SECTION:
1695 return;
1696 case XML_PARSER_COMMENT:
1697 return;
1698 case XML_PARSER_START_TAG:
1699 return;
1700 case XML_PARSER_END_TAG:
1701 return;
1702 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001703 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 return;
1705 case XML_PARSER_PROLOG:
1706 case XML_PARSER_START:
1707 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 return;
1710 case XML_PARSER_ENTITY_DECL:
1711 case XML_PARSER_CONTENT:
1712 case XML_PARSER_ATTRIBUTE_VALUE:
1713 case XML_PARSER_PI:
1714 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001715 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001716 /* we just ignore it there */
1717 return;
1718 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001719 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001720 return;
1721 case XML_PARSER_ENTITY_VALUE:
1722 /*
1723 * NOTE: in the case of entity values, we don't do the
1724 * substitution here since we need the literal
1725 * entity value to be able to save the internal
1726 * subset of the document.
1727 * This will be handled by xmlStringDecodeEntities
1728 */
1729 return;
1730 case XML_PARSER_DTD:
1731 /*
1732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1733 * In the internal DTD subset, parameter-entity references
1734 * can occur only where markup declarations can occur, not
1735 * within markup declarations.
1736 * In that case this is handled in xmlParseMarkupDecl
1737 */
1738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1739 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001740 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001741 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001742 break;
1743 case XML_PARSER_IGNORE:
1744 return;
1745 }
1746
1747 NEXT;
1748 name = xmlParseName(ctxt);
1749 if (xmlParserDebugEntities)
1750 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001751 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001752 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001753 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001754 } else {
1755 if (RAW == ';') {
1756 NEXT;
1757 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1758 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1759 if (entity == NULL) {
1760
1761 /*
1762 * [ WFC: Entity Declared ]
1763 * In a document without any DTD, a document with only an
1764 * internal DTD subset which contains no parameter entity
1765 * references, or a document with "standalone='yes'", ...
1766 * ... The declaration of a parameter entity must precede
1767 * any reference to it...
1768 */
1769 if ((ctxt->standalone == 1) ||
1770 ((ctxt->hasExternalSubset == 0) &&
1771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001774 } else {
1775 /*
1776 * [ VC: Entity Declared ]
1777 * In a document with an external subset or external
1778 * parameter entities with "standalone='no'", ...
1779 * ... The declaration of a parameter entity must precede
1780 * any reference to it...
1781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001782 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1783 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1784 "PEReference: %%%s; not found\n",
1785 name);
1786 } else
1787 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1788 "PEReference: %%%s; not found\n",
1789 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 ctxt->valid = 0;
1791 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 } else if (ctxt->input->free != deallocblankswrapper) {
1793 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1794 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001795 } else {
1796 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1797 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001798 xmlChar start[4];
1799 xmlCharEncoding enc;
1800
Owen Taylor3473f882001-02-23 17:55:21 +00001801 /*
1802 * handle the extra spaces added before and after
1803 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 */
1806 input = xmlNewEntityInputStream(ctxt, entity);
1807 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001808
1809 /*
1810 * Get the 4 first bytes and decode the charset
1811 * if enc != XML_CHAR_ENCODING_NONE
1812 * plug some encoding conversion routines.
1813 */
1814 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001815 if (entity->length >= 4) {
1816 start[0] = RAW;
1817 start[1] = NXT(1);
1818 start[2] = NXT(2);
1819 start[3] = NXT(3);
1820 enc = xmlDetectCharEncoding(start, 4);
1821 if (enc != XML_CHAR_ENCODING_NONE) {
1822 xmlSwitchEncoding(ctxt, enc);
1823 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001824 }
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001827 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1828 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001829 xmlParseTextDecl(ctxt);
1830 }
Owen Taylor3473f882001-02-23 17:55:21 +00001831 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001832 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1833 "PEReference: %s is not a parameter entity\n",
1834 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001835 }
1836 }
1837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001838 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001839 }
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841}
1842
1843/*
1844 * Macro used to grow the current buffer.
1845 */
1846#define growBuffer(buffer) { \
1847 buffer##_size *= 2; \
1848 buffer = (xmlChar *) \
1849 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001850 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001851}
1852
1853/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001854 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001855 * @ctxt: the parser context
1856 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001857 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001858 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1859 * @end: an end marker xmlChar, 0 if none
1860 * @end2: an end marker xmlChar, 0 if none
1861 * @end3: an end marker xmlChar, 0 if none
1862 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001863 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001864 *
1865 * [67] Reference ::= EntityRef | CharRef
1866 *
1867 * [69] PEReference ::= '%' Name ';'
1868 *
1869 * Returns A newly allocated string with the substitution done. The caller
1870 * must deallocate it !
1871 */
1872xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1874 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 xmlChar *buffer = NULL;
1876 int buffer_size = 0;
1877
1878 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001880 xmlEntityPtr ent;
1881 int c,l;
1882 int nbchars = 0;
1883
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001885 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001887
1888 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001889 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 return(NULL);
1891 }
1892
1893 /*
1894 * allocate a translation buffer.
1895 */
1896 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001897 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001898 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001899
1900 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001901 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001902 * we are operating on already parsed values.
1903 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001904 if (str < last)
1905 c = CUR_SCHAR(str, l);
1906 else
1907 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001908 while ((c != 0) && (c != end) && /* non input consuming loop */
1909 (c != end2) && (c != end3)) {
1910
1911 if (c == 0) break;
1912 if ((c == '&') && (str[1] == '#')) {
1913 int val = xmlParseStringCharRef(ctxt, &str);
1914 if (val != 0) {
1915 COPY_BUF(0,buffer,nbchars,val);
1916 }
1917 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1918 if (xmlParserDebugEntities)
1919 xmlGenericError(xmlGenericErrorContext,
1920 "String decoding Entity Reference: %.30s\n",
1921 str);
1922 ent = xmlParseStringEntityRef(ctxt, &str);
1923 if ((ent != NULL) &&
1924 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1925 if (ent->content != NULL) {
1926 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1927 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001928 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1929 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001930 }
1931 } else if ((ent != NULL) && (ent->content != NULL)) {
1932 xmlChar *rep;
1933
1934 ctxt->depth++;
1935 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1936 0, 0, 0);
1937 ctxt->depth--;
1938 if (rep != NULL) {
1939 current = rep;
1940 while (*current != 0) { /* non input consuming loop */
1941 buffer[nbchars++] = *current++;
1942 if (nbchars >
1943 buffer_size - XML_PARSER_BUFFER_SIZE) {
1944 growBuffer(buffer);
1945 }
1946 }
1947 xmlFree(rep);
1948 }
1949 } else if (ent != NULL) {
1950 int i = xmlStrlen(ent->name);
1951 const xmlChar *cur = ent->name;
1952
1953 buffer[nbchars++] = '&';
1954 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1955 growBuffer(buffer);
1956 }
1957 for (;i > 0;i--)
1958 buffer[nbchars++] = *cur++;
1959 buffer[nbchars++] = ';';
1960 }
1961 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "String decoding PE Reference: %.30s\n", str);
1965 ent = xmlParseStringPEReference(ctxt, &str);
1966 if (ent != NULL) {
1967 xmlChar *rep;
1968
1969 ctxt->depth++;
1970 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1971 0, 0, 0);
1972 ctxt->depth--;
1973 if (rep != NULL) {
1974 current = rep;
1975 while (*current != 0) { /* non input consuming loop */
1976 buffer[nbchars++] = *current++;
1977 if (nbchars >
1978 buffer_size - XML_PARSER_BUFFER_SIZE) {
1979 growBuffer(buffer);
1980 }
1981 }
1982 xmlFree(rep);
1983 }
1984 }
1985 } else {
1986 COPY_BUF(l,buffer,nbchars,c);
1987 str += l;
1988 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1989 growBuffer(buffer);
1990 }
1991 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001992 if (str < last)
1993 c = CUR_SCHAR(str, l);
1994 else
1995 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001996 }
1997 buffer[nbchars++] = 0;
1998 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001999
2000mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002001 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002002 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003}
2004
Daniel Veillarde57ec792003-09-10 10:50:59 +00002005/**
2006 * xmlStringDecodeEntities:
2007 * @ctxt: the parser context
2008 * @str: the input string
2009 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2010 * @end: an end marker xmlChar, 0 if none
2011 * @end2: an end marker xmlChar, 0 if none
2012 * @end3: an end marker xmlChar, 0 if none
2013 *
2014 * Takes a entity string content and process to do the adequate substitutions.
2015 *
2016 * [67] Reference ::= EntityRef | CharRef
2017 *
2018 * [69] PEReference ::= '%' Name ';'
2019 *
2020 * Returns A newly allocated string with the substitution done. The caller
2021 * must deallocate it !
2022 */
2023xmlChar *
2024xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2025 xmlChar end, xmlChar end2, xmlChar end3) {
2026 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2027 end, end2, end3));
2028}
Owen Taylor3473f882001-02-23 17:55:21 +00002029
2030/************************************************************************
2031 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002032 * Commodity functions, cleanup needed ? *
2033 * *
2034 ************************************************************************/
2035
2036/**
2037 * areBlanks:
2038 * @ctxt: an XML parser context
2039 * @str: a xmlChar *
2040 * @len: the size of @str
2041 *
2042 * Is this a sequence of blank chars that one can ignore ?
2043 *
2044 * Returns 1 if ignorable 0 otherwise.
2045 */
2046
2047static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2048 int i, ret;
2049 xmlNodePtr lastChild;
2050
Daniel Veillard05c13a22001-09-09 08:38:09 +00002051 /*
2052 * Don't spend time trying to differentiate them, the same callback is
2053 * used !
2054 */
2055 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002056 return(0);
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * Check for xml:space value.
2060 */
2061 if (*(ctxt->space) == 1)
2062 return(0);
2063
2064 /*
2065 * Check that the string is made of blanks
2066 */
2067 for (i = 0;i < len;i++)
William M. Brack76e95df2003-10-18 16:20:14 +00002068 if (!(IS_BLANK_CH(str[i]))) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002069
2070 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002071 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002072 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002073 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002074 if (ctxt->myDoc != NULL) {
2075 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2076 if (ret == 0) return(1);
2077 if (ret == 1) return(0);
2078 }
2079
2080 /*
2081 * Otherwise, heuristic :-\
2082 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002083 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 if ((ctxt->node->children == NULL) &&
2085 (RAW == '<') && (NXT(1) == '/')) return(0);
2086
2087 lastChild = xmlGetLastChild(ctxt->node);
2088 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002089 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2090 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002091 } else if (xmlNodeIsText(lastChild))
2092 return(0);
2093 else if ((ctxt->node->children != NULL) &&
2094 (xmlNodeIsText(ctxt->node->children)))
2095 return(0);
2096 return(1);
2097}
2098
Owen Taylor3473f882001-02-23 17:55:21 +00002099/************************************************************************
2100 * *
2101 * Extra stuff for namespace support *
2102 * Relates to http://www.w3.org/TR/WD-xml-names *
2103 * *
2104 ************************************************************************/
2105
2106/**
2107 * xmlSplitQName:
2108 * @ctxt: an XML parser context
2109 * @name: an XML parser context
2110 * @prefix: a xmlChar **
2111 *
2112 * parse an UTF8 encoded XML qualified name string
2113 *
2114 * [NS 5] QName ::= (Prefix ':')? LocalPart
2115 *
2116 * [NS 6] Prefix ::= NCName
2117 *
2118 * [NS 7] LocalPart ::= NCName
2119 *
2120 * Returns the local part, and prefix is updated
2121 * to get the Prefix if any.
2122 */
2123
2124xmlChar *
2125xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2126 xmlChar buf[XML_MAX_NAMELEN + 5];
2127 xmlChar *buffer = NULL;
2128 int len = 0;
2129 int max = XML_MAX_NAMELEN;
2130 xmlChar *ret = NULL;
2131 const xmlChar *cur = name;
2132 int c;
2133
2134 *prefix = NULL;
2135
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002136 if (cur == NULL) return(NULL);
2137
Owen Taylor3473f882001-02-23 17:55:21 +00002138#ifndef XML_XML_NAMESPACE
2139 /* xml: prefix is not really a namespace */
2140 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2141 (cur[2] == 'l') && (cur[3] == ':'))
2142 return(xmlStrdup(name));
2143#endif
2144
Daniel Veillard597bc482003-07-24 16:08:28 +00002145 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002146 if (cur[0] == ':')
2147 return(xmlStrdup(name));
2148
2149 c = *cur++;
2150 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2151 buf[len++] = c;
2152 c = *cur++;
2153 }
2154 if (len >= max) {
2155 /*
2156 * Okay someone managed to make a huge name, so he's ready to pay
2157 * for the processing speed.
2158 */
2159 max = len * 2;
2160
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002161 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002162 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002163 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002164 return(NULL);
2165 }
2166 memcpy(buffer, buf, len);
2167 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2168 if (len + 10 > max) {
2169 max *= 2;
2170 buffer = (xmlChar *) xmlRealloc(buffer,
2171 max * sizeof(xmlChar));
2172 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002173 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002174 return(NULL);
2175 }
2176 }
2177 buffer[len++] = c;
2178 c = *cur++;
2179 }
2180 buffer[len] = 0;
2181 }
2182
Daniel Veillard597bc482003-07-24 16:08:28 +00002183 /* nasty but well=formed
2184 if ((c == ':') && (*cur == 0)) {
2185 return(xmlStrdup(name));
2186 } */
2187
Owen Taylor3473f882001-02-23 17:55:21 +00002188 if (buffer == NULL)
2189 ret = xmlStrndup(buf, len);
2190 else {
2191 ret = buffer;
2192 buffer = NULL;
2193 max = XML_MAX_NAMELEN;
2194 }
2195
2196
2197 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002198 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002199 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002200 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002201 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002202 }
Owen Taylor3473f882001-02-23 17:55:21 +00002203 len = 0;
2204
Daniel Veillardbb284f42002-10-16 18:02:47 +00002205 /*
2206 * Check that the first character is proper to start
2207 * a new name
2208 */
2209 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2210 ((c >= 0x41) && (c <= 0x5A)) ||
2211 (c == '_') || (c == ':'))) {
2212 int l;
2213 int first = CUR_SCHAR(cur, l);
2214
2215 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002216 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002217 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002218 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002219 }
2220 }
2221 cur++;
2222
Owen Taylor3473f882001-02-23 17:55:21 +00002223 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2224 buf[len++] = c;
2225 c = *cur++;
2226 }
2227 if (len >= max) {
2228 /*
2229 * Okay someone managed to make a huge name, so he's ready to pay
2230 * for the processing speed.
2231 */
2232 max = len * 2;
2233
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002234 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002235 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002236 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002237 return(NULL);
2238 }
2239 memcpy(buffer, buf, len);
2240 while (c != 0) { /* tested bigname2.xml */
2241 if (len + 10 > max) {
2242 max *= 2;
2243 buffer = (xmlChar *) xmlRealloc(buffer,
2244 max * sizeof(xmlChar));
2245 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002246 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002247 return(NULL);
2248 }
2249 }
2250 buffer[len++] = c;
2251 c = *cur++;
2252 }
2253 buffer[len] = 0;
2254 }
2255
2256 if (buffer == NULL)
2257 ret = xmlStrndup(buf, len);
2258 else {
2259 ret = buffer;
2260 }
2261 }
2262
2263 return(ret);
2264}
2265
2266/************************************************************************
2267 * *
2268 * The parser itself *
2269 * Relates to http://www.w3.org/TR/REC-xml *
2270 * *
2271 ************************************************************************/
2272
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002273static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002275 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002276
Owen Taylor3473f882001-02-23 17:55:21 +00002277/**
2278 * xmlParseName:
2279 * @ctxt: an XML parser context
2280 *
2281 * parse an XML name.
2282 *
2283 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2284 * CombiningChar | Extender
2285 *
2286 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2287 *
2288 * [6] Names ::= Name (S Name)*
2289 *
2290 * Returns the Name parsed or NULL
2291 */
2292
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002293const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002294xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002295 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002296 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 int count = 0;
2298
2299 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002300
2301 /*
2302 * Accelerator for simple ASCII names
2303 */
2304 in = ctxt->input->cur;
2305 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2306 ((*in >= 0x41) && (*in <= 0x5A)) ||
2307 (*in == '_') || (*in == ':')) {
2308 in++;
2309 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2310 ((*in >= 0x41) && (*in <= 0x5A)) ||
2311 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002312 (*in == '_') || (*in == '-') ||
2313 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002314 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002315 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002316 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002317 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002318 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002319 ctxt->nbChars += count;
2320 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002321 if (ret == NULL)
2322 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002323 return(ret);
2324 }
2325 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002326 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002327}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002328
Daniel Veillard46de64e2002-05-29 08:21:33 +00002329/**
2330 * xmlParseNameAndCompare:
2331 * @ctxt: an XML parser context
2332 *
2333 * parse an XML name and compares for match
2334 * (specialized for endtag parsing)
2335 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002336 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2337 * and the name for mismatch
2338 */
2339
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002340static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002341xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2342 const xmlChar *cmp = other;
2343 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002345
2346 GROW;
2347
2348 in = ctxt->input->cur;
2349 while (*in != 0 && *in == *cmp) {
2350 ++in;
2351 ++cmp;
2352 }
William M. Brack76e95df2003-10-18 16:20:14 +00002353 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002354 /* success */
2355 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002356 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002357 }
2358 /* failure (or end of input buffer), check with full function */
2359 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002360 /* strings coming from the dictionnary direct compare possible */
2361 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002362 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002363 }
2364 return ret;
2365}
2366
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002367static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002368xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002369 int len = 0, l;
2370 int c;
2371 int count = 0;
2372
2373 /*
2374 * Handler for more complex cases
2375 */
2376 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002377 c = CUR_CHAR(l);
2378 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2379 (!IS_LETTER(c) && (c != '_') &&
2380 (c != ':'))) {
2381 return(NULL);
2382 }
2383
2384 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002385 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002386 (c == '.') || (c == '-') ||
2387 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002388 (IS_COMBINING(c)) ||
2389 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002390 if (count++ > 100) {
2391 count = 0;
2392 GROW;
2393 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002394 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002395 NEXTL(l);
2396 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002397 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002398 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002399}
2400
2401/**
2402 * xmlParseStringName:
2403 * @ctxt: an XML parser context
2404 * @str: a pointer to the string pointer (IN/OUT)
2405 *
2406 * parse an XML name.
2407 *
2408 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2409 * CombiningChar | Extender
2410 *
2411 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2412 *
2413 * [6] Names ::= Name (S Name)*
2414 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002415 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002416 * is updated to the current location in the string.
2417 */
2418
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002419static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002420xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2421 xmlChar buf[XML_MAX_NAMELEN + 5];
2422 const xmlChar *cur = *str;
2423 int len = 0, l;
2424 int c;
2425
2426 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002427 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002428 (c != ':')) {
2429 return(NULL);
2430 }
2431
William M. Brack871611b2003-10-18 04:53:14 +00002432 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002433 (c == '.') || (c == '-') ||
2434 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002435 (IS_COMBINING(c)) ||
2436 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002437 COPY_BUF(l,buf,len,c);
2438 cur += l;
2439 c = CUR_SCHAR(cur, l);
2440 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2441 /*
2442 * Okay someone managed to make a huge name, so he's ready to pay
2443 * for the processing speed.
2444 */
2445 xmlChar *buffer;
2446 int max = len * 2;
2447
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002448 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002449 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002450 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002451 return(NULL);
2452 }
2453 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002454 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002455 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002456 (c == '.') || (c == '-') ||
2457 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002458 (IS_COMBINING(c)) ||
2459 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002460 if (len + 10 > max) {
2461 max *= 2;
2462 buffer = (xmlChar *) xmlRealloc(buffer,
2463 max * sizeof(xmlChar));
2464 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002465 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002466 return(NULL);
2467 }
2468 }
2469 COPY_BUF(l,buffer,len,c);
2470 cur += l;
2471 c = CUR_SCHAR(cur, l);
2472 }
2473 buffer[len] = 0;
2474 *str = cur;
2475 return(buffer);
2476 }
2477 }
2478 *str = cur;
2479 return(xmlStrndup(buf, len));
2480}
2481
2482/**
2483 * xmlParseNmtoken:
2484 * @ctxt: an XML parser context
2485 *
2486 * parse an XML Nmtoken.
2487 *
2488 * [7] Nmtoken ::= (NameChar)+
2489 *
2490 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2491 *
2492 * Returns the Nmtoken parsed or NULL
2493 */
2494
2495xmlChar *
2496xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2497 xmlChar buf[XML_MAX_NAMELEN + 5];
2498 int len = 0, l;
2499 int c;
2500 int count = 0;
2501
2502 GROW;
2503 c = CUR_CHAR(l);
2504
William M. Brack871611b2003-10-18 04:53:14 +00002505 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002506 (c == '.') || (c == '-') ||
2507 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002508 (IS_COMBINING(c)) ||
2509 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002510 if (count++ > 100) {
2511 count = 0;
2512 GROW;
2513 }
2514 COPY_BUF(l,buf,len,c);
2515 NEXTL(l);
2516 c = CUR_CHAR(l);
2517 if (len >= XML_MAX_NAMELEN) {
2518 /*
2519 * Okay someone managed to make a huge token, so he's ready to pay
2520 * for the processing speed.
2521 */
2522 xmlChar *buffer;
2523 int max = len * 2;
2524
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002525 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002526 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002528 return(NULL);
2529 }
2530 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002531 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002532 (c == '.') || (c == '-') ||
2533 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002534 (IS_COMBINING(c)) ||
2535 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002536 if (count++ > 100) {
2537 count = 0;
2538 GROW;
2539 }
2540 if (len + 10 > max) {
2541 max *= 2;
2542 buffer = (xmlChar *) xmlRealloc(buffer,
2543 max * sizeof(xmlChar));
2544 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002545 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002546 return(NULL);
2547 }
2548 }
2549 COPY_BUF(l,buffer,len,c);
2550 NEXTL(l);
2551 c = CUR_CHAR(l);
2552 }
2553 buffer[len] = 0;
2554 return(buffer);
2555 }
2556 }
2557 if (len == 0)
2558 return(NULL);
2559 return(xmlStrndup(buf, len));
2560}
2561
2562/**
2563 * xmlParseEntityValue:
2564 * @ctxt: an XML parser context
2565 * @orig: if non-NULL store a copy of the original entity value
2566 *
2567 * parse a value for ENTITY declarations
2568 *
2569 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2570 * "'" ([^%&'] | PEReference | Reference)* "'"
2571 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002572 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002573 */
2574
2575xmlChar *
2576xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2577 xmlChar *buf = NULL;
2578 int len = 0;
2579 int size = XML_PARSER_BUFFER_SIZE;
2580 int c, l;
2581 xmlChar stop;
2582 xmlChar *ret = NULL;
2583 const xmlChar *cur = NULL;
2584 xmlParserInputPtr input;
2585
2586 if (RAW == '"') stop = '"';
2587 else if (RAW == '\'') stop = '\'';
2588 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002589 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002590 return(NULL);
2591 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002592 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002593 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002594 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002595 return(NULL);
2596 }
2597
2598 /*
2599 * The content of the entity definition is copied in a buffer.
2600 */
2601
2602 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2603 input = ctxt->input;
2604 GROW;
2605 NEXT;
2606 c = CUR_CHAR(l);
2607 /*
2608 * NOTE: 4.4.5 Included in Literal
2609 * When a parameter entity reference appears in a literal entity
2610 * value, ... a single or double quote character in the replacement
2611 * text is always treated as a normal data character and will not
2612 * terminate the literal.
2613 * In practice it means we stop the loop only when back at parsing
2614 * the initial entity and the quote is found
2615 */
William M. Brack871611b2003-10-18 04:53:14 +00002616 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002617 (ctxt->input != input))) {
2618 if (len + 5 >= size) {
2619 size *= 2;
2620 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2621 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002622 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002623 return(NULL);
2624 }
2625 }
2626 COPY_BUF(l,buf,len,c);
2627 NEXTL(l);
2628 /*
2629 * Pop-up of finished entities.
2630 */
2631 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2632 xmlPopInput(ctxt);
2633
2634 GROW;
2635 c = CUR_CHAR(l);
2636 if (c == 0) {
2637 GROW;
2638 c = CUR_CHAR(l);
2639 }
2640 }
2641 buf[len] = 0;
2642
2643 /*
2644 * Raise problem w.r.t. '&' and '%' being used in non-entities
2645 * reference constructs. Note Charref will be handled in
2646 * xmlStringDecodeEntities()
2647 */
2648 cur = buf;
2649 while (*cur != 0) { /* non input consuming */
2650 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2651 xmlChar *name;
2652 xmlChar tmp = *cur;
2653
2654 cur++;
2655 name = xmlParseStringName(ctxt, &cur);
2656 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002657 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002658 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002659 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002660 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002661 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2662 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002663 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002664 }
2665 if (name != NULL)
2666 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002667 if (*cur == 0)
2668 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002669 }
2670 cur++;
2671 }
2672
2673 /*
2674 * Then PEReference entities are substituted.
2675 */
2676 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002677 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002678 xmlFree(buf);
2679 } else {
2680 NEXT;
2681 /*
2682 * NOTE: 4.4.7 Bypassed
2683 * When a general entity reference appears in the EntityValue in
2684 * an entity declaration, it is bypassed and left as is.
2685 * so XML_SUBSTITUTE_REF is not set here.
2686 */
2687 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2688 0, 0, 0);
2689 if (orig != NULL)
2690 *orig = buf;
2691 else
2692 xmlFree(buf);
2693 }
2694
2695 return(ret);
2696}
2697
2698/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002699 * xmlParseAttValueComplex:
2700 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002701 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002702 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002703 *
2704 * parse a value for an attribute, this is the fallback function
2705 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002706 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002707 *
2708 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2709 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002710static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002711xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002712 xmlChar limit = 0;
2713 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 int len = 0;
2715 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002716 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002717 xmlChar *current = NULL;
2718 xmlEntityPtr ent;
2719
Owen Taylor3473f882001-02-23 17:55:21 +00002720 if (NXT(0) == '"') {
2721 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2722 limit = '"';
2723 NEXT;
2724 } else if (NXT(0) == '\'') {
2725 limit = '\'';
2726 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2727 NEXT;
2728 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002729 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002730 return(NULL);
2731 }
2732
2733 /*
2734 * allocate a translation buffer.
2735 */
2736 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002737 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002738 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002739
2740 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002741 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002742 */
2743 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002744 while ((NXT(0) != limit) && /* checked */
2745 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002747 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002748 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002749 if (NXT(1) == '#') {
2750 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002751
Owen Taylor3473f882001-02-23 17:55:21 +00002752 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002753 if (ctxt->replaceEntities) {
2754 if (len > buf_size - 10) {
2755 growBuffer(buf);
2756 }
2757 buf[len++] = '&';
2758 } else {
2759 /*
2760 * The reparsing will be done in xmlStringGetNodeList()
2761 * called by the attribute() function in SAX.c
2762 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002763 if (len > buf_size - 10) {
2764 growBuffer(buf);
2765 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002766 buf[len++] = '&';
2767 buf[len++] = '#';
2768 buf[len++] = '3';
2769 buf[len++] = '8';
2770 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002771 }
2772 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002773 if (len > buf_size - 10) {
2774 growBuffer(buf);
2775 }
Owen Taylor3473f882001-02-23 17:55:21 +00002776 len += xmlCopyChar(0, &buf[len], val);
2777 }
2778 } else {
2779 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002780 if ((ent != NULL) &&
2781 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2782 if (len > buf_size - 10) {
2783 growBuffer(buf);
2784 }
2785 if ((ctxt->replaceEntities == 0) &&
2786 (ent->content[0] == '&')) {
2787 buf[len++] = '&';
2788 buf[len++] = '#';
2789 buf[len++] = '3';
2790 buf[len++] = '8';
2791 buf[len++] = ';';
2792 } else {
2793 buf[len++] = ent->content[0];
2794 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002795 } else if ((ent != NULL) &&
2796 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002797 xmlChar *rep;
2798
2799 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2800 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002801 XML_SUBSTITUTE_REF,
2802 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (rep != NULL) {
2804 current = rep;
2805 while (*current != 0) { /* non input consuming */
2806 buf[len++] = *current++;
2807 if (len > buf_size - 10) {
2808 growBuffer(buf);
2809 }
2810 }
2811 xmlFree(rep);
2812 }
2813 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002814 if (len > buf_size - 10) {
2815 growBuffer(buf);
2816 }
Owen Taylor3473f882001-02-23 17:55:21 +00002817 if (ent->content != NULL)
2818 buf[len++] = ent->content[0];
2819 }
2820 } else if (ent != NULL) {
2821 int i = xmlStrlen(ent->name);
2822 const xmlChar *cur = ent->name;
2823
2824 /*
2825 * This may look absurd but is needed to detect
2826 * entities problems
2827 */
2828 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2829 (ent->content != NULL)) {
2830 xmlChar *rep;
2831 rep = xmlStringDecodeEntities(ctxt, ent->content,
2832 XML_SUBSTITUTE_REF, 0, 0, 0);
2833 if (rep != NULL)
2834 xmlFree(rep);
2835 }
2836
2837 /*
2838 * Just output the reference
2839 */
2840 buf[len++] = '&';
2841 if (len > buf_size - i - 10) {
2842 growBuffer(buf);
2843 }
2844 for (;i > 0;i--)
2845 buf[len++] = *cur++;
2846 buf[len++] = ';';
2847 }
2848 }
2849 } else {
2850 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002851 if ((len != 0) || (!normalize)) {
2852 if ((!normalize) || (!in_space)) {
2853 COPY_BUF(l,buf,len,0x20);
2854 if (len > buf_size - 10) {
2855 growBuffer(buf);
2856 }
2857 }
2858 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002859 }
2860 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002861 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002862 COPY_BUF(l,buf,len,c);
2863 if (len > buf_size - 10) {
2864 growBuffer(buf);
2865 }
2866 }
2867 NEXTL(l);
2868 }
2869 GROW;
2870 c = CUR_CHAR(l);
2871 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002872 if ((in_space) && (normalize)) {
2873 while (buf[len - 1] == 0x20) len--;
2874 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002875 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002877 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002878 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002879 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2880 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002881 } else
2882 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002883 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002884 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002885
2886mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002888 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002889}
2890
2891/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002892 * xmlParseAttValue:
2893 * @ctxt: an XML parser context
2894 *
2895 * parse a value for an attribute
2896 * Note: the parser won't do substitution of entities here, this
2897 * will be handled later in xmlStringGetNodeList
2898 *
2899 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2900 * "'" ([^<&'] | Reference)* "'"
2901 *
2902 * 3.3.3 Attribute-Value Normalization:
2903 * Before the value of an attribute is passed to the application or
2904 * checked for validity, the XML processor must normalize it as follows:
2905 * - a character reference is processed by appending the referenced
2906 * character to the attribute value
2907 * - an entity reference is processed by recursively processing the
2908 * replacement text of the entity
2909 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2910 * appending #x20 to the normalized value, except that only a single
2911 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2912 * parsed entity or the literal entity value of an internal parsed entity
2913 * - other characters are processed by appending them to the normalized value
2914 * If the declared value is not CDATA, then the XML processor must further
2915 * process the normalized attribute value by discarding any leading and
2916 * trailing space (#x20) characters, and by replacing sequences of space
2917 * (#x20) characters by a single space (#x20) character.
2918 * All attributes for which no declaration has been read should be treated
2919 * by a non-validating parser as if declared CDATA.
2920 *
2921 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2922 */
2923
2924
2925xmlChar *
2926xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002927 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00002928}
2929
2930/**
Owen Taylor3473f882001-02-23 17:55:21 +00002931 * xmlParseSystemLiteral:
2932 * @ctxt: an XML parser context
2933 *
2934 * parse an XML Literal
2935 *
2936 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2937 *
2938 * Returns the SystemLiteral parsed or NULL
2939 */
2940
2941xmlChar *
2942xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2943 xmlChar *buf = NULL;
2944 int len = 0;
2945 int size = XML_PARSER_BUFFER_SIZE;
2946 int cur, l;
2947 xmlChar stop;
2948 int state = ctxt->instate;
2949 int count = 0;
2950
2951 SHRINK;
2952 if (RAW == '"') {
2953 NEXT;
2954 stop = '"';
2955 } else if (RAW == '\'') {
2956 NEXT;
2957 stop = '\'';
2958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002959 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002960 return(NULL);
2961 }
2962
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002963 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002964 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002965 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002966 return(NULL);
2967 }
2968 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2969 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00002970 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002971 if (len + 5 >= size) {
2972 size *= 2;
2973 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2974 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002975 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002976 ctxt->instate = (xmlParserInputState) state;
2977 return(NULL);
2978 }
2979 }
2980 count++;
2981 if (count > 50) {
2982 GROW;
2983 count = 0;
2984 }
2985 COPY_BUF(l,buf,len,cur);
2986 NEXTL(l);
2987 cur = CUR_CHAR(l);
2988 if (cur == 0) {
2989 GROW;
2990 SHRINK;
2991 cur = CUR_CHAR(l);
2992 }
2993 }
2994 buf[len] = 0;
2995 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00002996 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002997 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002998 } else {
2999 NEXT;
3000 }
3001 return(buf);
3002}
3003
3004/**
3005 * xmlParsePubidLiteral:
3006 * @ctxt: an XML parser context
3007 *
3008 * parse an XML public literal
3009 *
3010 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3011 *
3012 * Returns the PubidLiteral parsed or NULL.
3013 */
3014
3015xmlChar *
3016xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3017 xmlChar *buf = NULL;
3018 int len = 0;
3019 int size = XML_PARSER_BUFFER_SIZE;
3020 xmlChar cur;
3021 xmlChar stop;
3022 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003023 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003024
3025 SHRINK;
3026 if (RAW == '"') {
3027 NEXT;
3028 stop = '"';
3029 } else if (RAW == '\'') {
3030 NEXT;
3031 stop = '\'';
3032 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003033 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003034 return(NULL);
3035 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003036 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003037 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003038 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003039 return(NULL);
3040 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003041 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003042 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003043 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003044 if (len + 1 >= size) {
3045 size *= 2;
3046 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3047 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003048 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003049 return(NULL);
3050 }
3051 }
3052 buf[len++] = cur;
3053 count++;
3054 if (count > 50) {
3055 GROW;
3056 count = 0;
3057 }
3058 NEXT;
3059 cur = CUR;
3060 if (cur == 0) {
3061 GROW;
3062 SHRINK;
3063 cur = CUR;
3064 }
3065 }
3066 buf[len] = 0;
3067 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003068 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003069 } else {
3070 NEXT;
3071 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003072 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003073 return(buf);
3074}
3075
Daniel Veillard48b2f892001-02-25 16:11:03 +00003076void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003077/**
3078 * xmlParseCharData:
3079 * @ctxt: an XML parser context
3080 * @cdata: int indicating whether we are within a CDATA section
3081 *
3082 * parse a CharData section.
3083 * if we are within a CDATA section ']]>' marks an end of section.
3084 *
3085 * The right angle bracket (>) may be represented using the string "&gt;",
3086 * and must, for compatibility, be escaped using "&gt;" or a character
3087 * reference when it appears in the string "]]>" in content, when that
3088 * string is not marking the end of a CDATA section.
3089 *
3090 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3091 */
3092
3093void
3094xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003095 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003096 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003097 int line = ctxt->input->line;
3098 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003099
3100 SHRINK;
3101 GROW;
3102 /*
3103 * Accelerated common case where input don't need to be
3104 * modified before passing it to the handler.
3105 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003106 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003107 in = ctxt->input->cur;
3108 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003109get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003110 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3111 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003112 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003113 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003114 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003115 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003116 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003117 ctxt->input->line++;
3118 in++;
3119 }
3120 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003121 }
3122 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003123 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003124 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003125 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003126 return;
3127 }
3128 in++;
3129 goto get_more;
3130 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003131 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003132 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003133 if ((ctxt->sax->ignorableWhitespace !=
3134 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003135 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003136 const xmlChar *tmp = ctxt->input->cur;
3137 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003138
Daniel Veillarda7374592001-05-10 14:17:55 +00003139 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003140 ctxt->sax->ignorableWhitespace(ctxt->userData,
3141 tmp, nbchar);
3142 } else if (ctxt->sax->characters != NULL)
3143 ctxt->sax->characters(ctxt->userData,
3144 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003145 line = ctxt->input->line;
3146 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003147 } else {
3148 if (ctxt->sax->characters != NULL)
3149 ctxt->sax->characters(ctxt->userData,
3150 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003151 line = ctxt->input->line;
3152 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003153 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003154 }
3155 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003156 if (*in == 0xD) {
3157 in++;
3158 if (*in == 0xA) {
3159 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003160 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003161 ctxt->input->line++;
3162 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003163 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003164 in--;
3165 }
3166 if (*in == '<') {
3167 return;
3168 }
3169 if (*in == '&') {
3170 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003171 }
3172 SHRINK;
3173 GROW;
3174 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003175 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003176 nbchar = 0;
3177 }
Daniel Veillard50582112001-03-26 22:52:16 +00003178 ctxt->input->line = line;
3179 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003180 xmlParseCharDataComplex(ctxt, cdata);
3181}
3182
Daniel Veillard01c13b52002-12-10 15:19:08 +00003183/**
3184 * xmlParseCharDataComplex:
3185 * @ctxt: an XML parser context
3186 * @cdata: int indicating whether we are within a CDATA section
3187 *
3188 * parse a CharData section.this is the fallback function
3189 * of xmlParseCharData() when the parsing requires handling
3190 * of non-ASCII characters.
3191 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003192void
3193xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003194 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3195 int nbchar = 0;
3196 int cur, l;
3197 int count = 0;
3198
3199 SHRINK;
3200 GROW;
3201 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003202 while ((cur != '<') && /* checked */
3203 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003204 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003205 if ((cur == ']') && (NXT(1) == ']') &&
3206 (NXT(2) == '>')) {
3207 if (cdata) break;
3208 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 }
3211 }
3212 COPY_BUF(l,buf,nbchar,cur);
3213 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003214 buf[nbchar] = 0;
3215
Owen Taylor3473f882001-02-23 17:55:21 +00003216 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003217 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003218 */
3219 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3220 if (areBlanks(ctxt, buf, nbchar)) {
3221 if (ctxt->sax->ignorableWhitespace != NULL)
3222 ctxt->sax->ignorableWhitespace(ctxt->userData,
3223 buf, nbchar);
3224 } else {
3225 if (ctxt->sax->characters != NULL)
3226 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3227 }
3228 }
3229 nbchar = 0;
3230 }
3231 count++;
3232 if (count > 50) {
3233 GROW;
3234 count = 0;
3235 }
3236 NEXTL(l);
3237 cur = CUR_CHAR(l);
3238 }
3239 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003240 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003241 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003242 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003243 */
3244 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3245 if (areBlanks(ctxt, buf, nbchar)) {
3246 if (ctxt->sax->ignorableWhitespace != NULL)
3247 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3248 } else {
3249 if (ctxt->sax->characters != NULL)
3250 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3251 }
3252 }
3253 }
3254}
3255
3256/**
3257 * xmlParseExternalID:
3258 * @ctxt: an XML parser context
3259 * @publicID: a xmlChar** receiving PubidLiteral
3260 * @strict: indicate whether we should restrict parsing to only
3261 * production [75], see NOTE below
3262 *
3263 * Parse an External ID or a Public ID
3264 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003265 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003266 * 'PUBLIC' S PubidLiteral S SystemLiteral
3267 *
3268 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3269 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3270 *
3271 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3272 *
3273 * Returns the function returns SystemLiteral and in the second
3274 * case publicID receives PubidLiteral, is strict is off
3275 * it is possible to return NULL and have publicID set.
3276 */
3277
3278xmlChar *
3279xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3280 xmlChar *URI = NULL;
3281
3282 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003283
3284 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003285 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003286 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003287 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003288 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3289 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003290 }
3291 SKIP_BLANKS;
3292 URI = xmlParseSystemLiteral(ctxt);
3293 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003294 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003295 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003296 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003297 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003298 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003300 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003301 }
3302 SKIP_BLANKS;
3303 *publicID = xmlParsePubidLiteral(ctxt);
3304 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003305 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003306 }
3307 if (strict) {
3308 /*
3309 * We don't handle [83] so "S SystemLiteral" is required.
3310 */
William M. Brack76e95df2003-10-18 16:20:14 +00003311 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003312 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003313 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 } else {
3316 /*
3317 * We handle [83] so we return immediately, if
3318 * "S SystemLiteral" is not detected. From a purely parsing
3319 * point of view that's a nice mess.
3320 */
3321 const xmlChar *ptr;
3322 GROW;
3323
3324 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003325 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003326
William M. Brack76e95df2003-10-18 16:20:14 +00003327 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003328 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3329 }
3330 SKIP_BLANKS;
3331 URI = xmlParseSystemLiteral(ctxt);
3332 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003333 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003334 }
3335 }
3336 return(URI);
3337}
3338
3339/**
3340 * xmlParseComment:
3341 * @ctxt: an XML parser context
3342 *
3343 * Skip an XML (SGML) comment <!-- .... -->
3344 * The spec says that "For compatibility, the string "--" (double-hyphen)
3345 * must not occur within comments. "
3346 *
3347 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3348 */
3349void
3350xmlParseComment(xmlParserCtxtPtr ctxt) {
3351 xmlChar *buf = NULL;
3352 int len;
3353 int size = XML_PARSER_BUFFER_SIZE;
3354 int q, ql;
3355 int r, rl;
3356 int cur, l;
3357 xmlParserInputState state;
3358 xmlParserInputPtr input = ctxt->input;
3359 int count = 0;
3360
3361 /*
3362 * Check that there is a comment right here.
3363 */
3364 if ((RAW != '<') || (NXT(1) != '!') ||
3365 (NXT(2) != '-') || (NXT(3) != '-')) return;
3366
3367 state = ctxt->instate;
3368 ctxt->instate = XML_PARSER_COMMENT;
3369 SHRINK;
3370 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003371 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003373 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003374 ctxt->instate = state;
3375 return;
3376 }
3377 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003378 if (q == 0)
3379 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003380 NEXTL(ql);
3381 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003382 if (r == 0)
3383 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003384 NEXTL(rl);
3385 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003386 if (cur == 0)
3387 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003388 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003389 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003390 ((cur != '>') ||
3391 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003392 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003393 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003394 }
3395 if (len + 5 >= size) {
3396 size *= 2;
3397 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3398 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003399 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003400 ctxt->instate = state;
3401 return;
3402 }
3403 }
3404 COPY_BUF(ql,buf,len,q);
3405 q = r;
3406 ql = rl;
3407 r = cur;
3408 rl = l;
3409
3410 count++;
3411 if (count > 50) {
3412 GROW;
3413 count = 0;
3414 }
3415 NEXTL(l);
3416 cur = CUR_CHAR(l);
3417 if (cur == 0) {
3418 SHRINK;
3419 GROW;
3420 cur = CUR_CHAR(l);
3421 }
3422 }
3423 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003424 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003425 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003426 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003427 xmlFree(buf);
3428 } else {
3429 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003430 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3431 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003432 }
3433 NEXT;
3434 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3435 (!ctxt->disableSAX))
3436 ctxt->sax->comment(ctxt->userData, buf);
3437 xmlFree(buf);
3438 }
3439 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003440 return;
3441not_terminated:
3442 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3443 "Comment not terminated\n", NULL);
3444 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003445}
3446
3447/**
3448 * xmlParsePITarget:
3449 * @ctxt: an XML parser context
3450 *
3451 * parse the name of a PI
3452 *
3453 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3454 *
3455 * Returns the PITarget name or NULL
3456 */
3457
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003458const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003459xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003460 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003461
3462 name = xmlParseName(ctxt);
3463 if ((name != NULL) &&
3464 ((name[0] == 'x') || (name[0] == 'X')) &&
3465 ((name[1] == 'm') || (name[1] == 'M')) &&
3466 ((name[2] == 'l') || (name[2] == 'L'))) {
3467 int i;
3468 if ((name[0] == 'x') && (name[1] == 'm') &&
3469 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003470 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003471 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003472 return(name);
3473 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003474 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003475 return(name);
3476 }
3477 for (i = 0;;i++) {
3478 if (xmlW3CPIs[i] == NULL) break;
3479 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3480 return(name);
3481 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003482 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3483 "xmlParsePITarget: invalid name prefix 'xml'\n",
3484 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003485 }
3486 return(name);
3487}
3488
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003489#ifdef LIBXML_CATALOG_ENABLED
3490/**
3491 * xmlParseCatalogPI:
3492 * @ctxt: an XML parser context
3493 * @catalog: the PI value string
3494 *
3495 * parse an XML Catalog Processing Instruction.
3496 *
3497 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3498 *
3499 * Occurs only if allowed by the user and if happening in the Misc
3500 * part of the document before any doctype informations
3501 * This will add the given catalog to the parsing context in order
3502 * to be used if there is a resolution need further down in the document
3503 */
3504
3505static void
3506xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3507 xmlChar *URL = NULL;
3508 const xmlChar *tmp, *base;
3509 xmlChar marker;
3510
3511 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003512 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003513 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3514 goto error;
3515 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003516 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003517 if (*tmp != '=') {
3518 return;
3519 }
3520 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003521 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003522 marker = *tmp;
3523 if ((marker != '\'') && (marker != '"'))
3524 goto error;
3525 tmp++;
3526 base = tmp;
3527 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3528 if (*tmp == 0)
3529 goto error;
3530 URL = xmlStrndup(base, tmp - base);
3531 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003532 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003533 if (*tmp != 0)
3534 goto error;
3535
3536 if (URL != NULL) {
3537 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3538 xmlFree(URL);
3539 }
3540 return;
3541
3542error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003543 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3544 "Catalog PI syntax error: %s\n",
3545 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003546 if (URL != NULL)
3547 xmlFree(URL);
3548}
3549#endif
3550
Owen Taylor3473f882001-02-23 17:55:21 +00003551/**
3552 * xmlParsePI:
3553 * @ctxt: an XML parser context
3554 *
3555 * parse an XML Processing Instruction.
3556 *
3557 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3558 *
3559 * The processing is transfered to SAX once parsed.
3560 */
3561
3562void
3563xmlParsePI(xmlParserCtxtPtr ctxt) {
3564 xmlChar *buf = NULL;
3565 int len = 0;
3566 int size = XML_PARSER_BUFFER_SIZE;
3567 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003568 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 xmlParserInputState state;
3570 int count = 0;
3571
3572 if ((RAW == '<') && (NXT(1) == '?')) {
3573 xmlParserInputPtr input = ctxt->input;
3574 state = ctxt->instate;
3575 ctxt->instate = XML_PARSER_PI;
3576 /*
3577 * this is a Processing Instruction.
3578 */
3579 SKIP(2);
3580 SHRINK;
3581
3582 /*
3583 * Parse the target name and check for special support like
3584 * namespace.
3585 */
3586 target = xmlParsePITarget(ctxt);
3587 if (target != NULL) {
3588 if ((RAW == '?') && (NXT(1) == '>')) {
3589 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003590 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3591 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003592 }
3593 SKIP(2);
3594
3595 /*
3596 * SAX: PI detected.
3597 */
3598 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3599 (ctxt->sax->processingInstruction != NULL))
3600 ctxt->sax->processingInstruction(ctxt->userData,
3601 target, NULL);
3602 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003603 return;
3604 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003605 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003606 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003607 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003608 ctxt->instate = state;
3609 return;
3610 }
3611 cur = CUR;
3612 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003613 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3614 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003615 }
3616 SKIP_BLANKS;
3617 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003618 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003619 ((cur != '?') || (NXT(1) != '>'))) {
3620 if (len + 5 >= size) {
3621 size *= 2;
3622 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3623 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003624 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003625 ctxt->instate = state;
3626 return;
3627 }
3628 }
3629 count++;
3630 if (count > 50) {
3631 GROW;
3632 count = 0;
3633 }
3634 COPY_BUF(l,buf,len,cur);
3635 NEXTL(l);
3636 cur = CUR_CHAR(l);
3637 if (cur == 0) {
3638 SHRINK;
3639 GROW;
3640 cur = CUR_CHAR(l);
3641 }
3642 }
3643 buf[len] = 0;
3644 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003645 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3646 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003647 } else {
3648 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003649 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3650 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003651 }
3652 SKIP(2);
3653
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003654#ifdef LIBXML_CATALOG_ENABLED
3655 if (((state == XML_PARSER_MISC) ||
3656 (state == XML_PARSER_START)) &&
3657 (xmlStrEqual(target, XML_CATALOG_PI))) {
3658 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3659 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3660 (allow == XML_CATA_ALLOW_ALL))
3661 xmlParseCatalogPI(ctxt, buf);
3662 }
3663#endif
3664
3665
Owen Taylor3473f882001-02-23 17:55:21 +00003666 /*
3667 * SAX: PI detected.
3668 */
3669 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3670 (ctxt->sax->processingInstruction != NULL))
3671 ctxt->sax->processingInstruction(ctxt->userData,
3672 target, buf);
3673 }
3674 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003675 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003676 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003677 }
3678 ctxt->instate = state;
3679 }
3680}
3681
3682/**
3683 * xmlParseNotationDecl:
3684 * @ctxt: an XML parser context
3685 *
3686 * parse a notation declaration
3687 *
3688 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3689 *
3690 * Hence there is actually 3 choices:
3691 * 'PUBLIC' S PubidLiteral
3692 * 'PUBLIC' S PubidLiteral S SystemLiteral
3693 * and 'SYSTEM' S SystemLiteral
3694 *
3695 * See the NOTE on xmlParseExternalID().
3696 */
3697
3698void
3699xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003700 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003701 xmlChar *Pubid;
3702 xmlChar *Systemid;
3703
Daniel Veillarda07050d2003-10-19 14:46:32 +00003704 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003705 xmlParserInputPtr input = ctxt->input;
3706 SHRINK;
3707 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003708 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003709 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3710 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003711 return;
3712 }
3713 SKIP_BLANKS;
3714
Daniel Veillard76d66f42001-05-16 21:05:17 +00003715 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003717 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003718 return;
3719 }
William M. Brack76e95df2003-10-18 16:20:14 +00003720 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003722 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003723 return;
3724 }
3725 SKIP_BLANKS;
3726
3727 /*
3728 * Parse the IDs.
3729 */
3730 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3731 SKIP_BLANKS;
3732
3733 if (RAW == '>') {
3734 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3736 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738 NEXT;
3739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3740 (ctxt->sax->notationDecl != NULL))
3741 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3742 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003743 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003744 }
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (Systemid != NULL) xmlFree(Systemid);
3746 if (Pubid != NULL) xmlFree(Pubid);
3747 }
3748}
3749
3750/**
3751 * xmlParseEntityDecl:
3752 * @ctxt: an XML parser context
3753 *
3754 * parse <!ENTITY declarations
3755 *
3756 * [70] EntityDecl ::= GEDecl | PEDecl
3757 *
3758 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3759 *
3760 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3761 *
3762 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3763 *
3764 * [74] PEDef ::= EntityValue | ExternalID
3765 *
3766 * [76] NDataDecl ::= S 'NDATA' S Name
3767 *
3768 * [ VC: Notation Declared ]
3769 * The Name must match the declared name of a notation.
3770 */
3771
3772void
3773xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003774 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 xmlChar *value = NULL;
3776 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003777 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003778 int isParameter = 0;
3779 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003780 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003781
3782 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003783 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003784 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003785 SHRINK;
3786 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003787 skipped = SKIP_BLANKS;
3788 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003789 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3790 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003791 }
Owen Taylor3473f882001-02-23 17:55:21 +00003792
3793 if (RAW == '%') {
3794 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003795 skipped = SKIP_BLANKS;
3796 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3798 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003799 }
Owen Taylor3473f882001-02-23 17:55:21 +00003800 isParameter = 1;
3801 }
3802
Daniel Veillard76d66f42001-05-16 21:05:17 +00003803 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003804 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003805 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3806 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003807 return;
3808 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003809 skipped = SKIP_BLANKS;
3810 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003811 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3812 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003813 }
Owen Taylor3473f882001-02-23 17:55:21 +00003814
Daniel Veillardf5582f12002-06-11 10:08:16 +00003815 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 /*
3817 * handle the various case of definitions...
3818 */
3819 if (isParameter) {
3820 if ((RAW == '"') || (RAW == '\'')) {
3821 value = xmlParseEntityValue(ctxt, &orig);
3822 if (value) {
3823 if ((ctxt->sax != NULL) &&
3824 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3825 ctxt->sax->entityDecl(ctxt->userData, name,
3826 XML_INTERNAL_PARAMETER_ENTITY,
3827 NULL, NULL, value);
3828 }
3829 } else {
3830 URI = xmlParseExternalID(ctxt, &literal, 1);
3831 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003832 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003833 }
3834 if (URI) {
3835 xmlURIPtr uri;
3836
3837 uri = xmlParseURI((const char *) URI);
3838 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003839 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3840 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003841 /*
3842 * This really ought to be a well formedness error
3843 * but the XML Core WG decided otherwise c.f. issue
3844 * E26 of the XML erratas.
3845 */
Owen Taylor3473f882001-02-23 17:55:21 +00003846 } else {
3847 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003848 /*
3849 * Okay this is foolish to block those but not
3850 * invalid URIs.
3851 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003852 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003853 } else {
3854 if ((ctxt->sax != NULL) &&
3855 (!ctxt->disableSAX) &&
3856 (ctxt->sax->entityDecl != NULL))
3857 ctxt->sax->entityDecl(ctxt->userData, name,
3858 XML_EXTERNAL_PARAMETER_ENTITY,
3859 literal, URI, NULL);
3860 }
3861 xmlFreeURI(uri);
3862 }
3863 }
3864 }
3865 } else {
3866 if ((RAW == '"') || (RAW == '\'')) {
3867 value = xmlParseEntityValue(ctxt, &orig);
3868 if ((ctxt->sax != NULL) &&
3869 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3870 ctxt->sax->entityDecl(ctxt->userData, name,
3871 XML_INTERNAL_GENERAL_ENTITY,
3872 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003873 /*
3874 * For expat compatibility in SAX mode.
3875 */
3876 if ((ctxt->myDoc == NULL) ||
3877 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3878 if (ctxt->myDoc == NULL) {
3879 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3880 }
3881 if (ctxt->myDoc->intSubset == NULL)
3882 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3883 BAD_CAST "fake", NULL, NULL);
3884
Daniel Veillard1af9a412003-08-20 22:54:39 +00003885 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3886 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003887 }
Owen Taylor3473f882001-02-23 17:55:21 +00003888 } else {
3889 URI = xmlParseExternalID(ctxt, &literal, 1);
3890 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003891 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003892 }
3893 if (URI) {
3894 xmlURIPtr uri;
3895
3896 uri = xmlParseURI((const char *)URI);
3897 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003898 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3899 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003900 /*
3901 * This really ought to be a well formedness error
3902 * but the XML Core WG decided otherwise c.f. issue
3903 * E26 of the XML erratas.
3904 */
Owen Taylor3473f882001-02-23 17:55:21 +00003905 } else {
3906 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003907 /*
3908 * Okay this is foolish to block those but not
3909 * invalid URIs.
3910 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003911 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003912 }
3913 xmlFreeURI(uri);
3914 }
3915 }
William M. Brack76e95df2003-10-18 16:20:14 +00003916 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3918 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003919 }
3920 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003921 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003922 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00003923 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3925 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003926 }
3927 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003928 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003929 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3930 (ctxt->sax->unparsedEntityDecl != NULL))
3931 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3932 literal, URI, ndata);
3933 } else {
3934 if ((ctxt->sax != NULL) &&
3935 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3936 ctxt->sax->entityDecl(ctxt->userData, name,
3937 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3938 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003939 /*
3940 * For expat compatibility in SAX mode.
3941 * assuming the entity repalcement was asked for
3942 */
3943 if ((ctxt->replaceEntities != 0) &&
3944 ((ctxt->myDoc == NULL) ||
3945 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3946 if (ctxt->myDoc == NULL) {
3947 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3948 }
3949
3950 if (ctxt->myDoc->intSubset == NULL)
3951 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3952 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00003953 xmlSAX2EntityDecl(ctxt, name,
3954 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3955 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003956 }
Owen Taylor3473f882001-02-23 17:55:21 +00003957 }
3958 }
3959 }
3960 SKIP_BLANKS;
3961 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003962 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003963 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00003964 } else {
3965 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003966 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3967 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003968 }
3969 NEXT;
3970 }
3971 if (orig != NULL) {
3972 /*
3973 * Ugly mechanism to save the raw entity value.
3974 */
3975 xmlEntityPtr cur = NULL;
3976
3977 if (isParameter) {
3978 if ((ctxt->sax != NULL) &&
3979 (ctxt->sax->getParameterEntity != NULL))
3980 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3981 } else {
3982 if ((ctxt->sax != NULL) &&
3983 (ctxt->sax->getEntity != NULL))
3984 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003985 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00003986 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003987 }
Owen Taylor3473f882001-02-23 17:55:21 +00003988 }
3989 if (cur != NULL) {
3990 if (cur->orig != NULL)
3991 xmlFree(orig);
3992 else
3993 cur->orig = orig;
3994 } else
3995 xmlFree(orig);
3996 }
Owen Taylor3473f882001-02-23 17:55:21 +00003997 if (value != NULL) xmlFree(value);
3998 if (URI != NULL) xmlFree(URI);
3999 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004000 }
4001}
4002
4003/**
4004 * xmlParseDefaultDecl:
4005 * @ctxt: an XML parser context
4006 * @value: Receive a possible fixed default value for the attribute
4007 *
4008 * Parse an attribute default declaration
4009 *
4010 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4011 *
4012 * [ VC: Required Attribute ]
4013 * if the default declaration is the keyword #REQUIRED, then the
4014 * attribute must be specified for all elements of the type in the
4015 * attribute-list declaration.
4016 *
4017 * [ VC: Attribute Default Legal ]
4018 * The declared default value must meet the lexical constraints of
4019 * the declared attribute type c.f. xmlValidateAttributeDecl()
4020 *
4021 * [ VC: Fixed Attribute Default ]
4022 * if an attribute has a default value declared with the #FIXED
4023 * keyword, instances of that attribute must match the default value.
4024 *
4025 * [ WFC: No < in Attribute Values ]
4026 * handled in xmlParseAttValue()
4027 *
4028 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4029 * or XML_ATTRIBUTE_FIXED.
4030 */
4031
4032int
4033xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4034 int val;
4035 xmlChar *ret;
4036
4037 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004038 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004039 SKIP(9);
4040 return(XML_ATTRIBUTE_REQUIRED);
4041 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004042 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004043 SKIP(8);
4044 return(XML_ATTRIBUTE_IMPLIED);
4045 }
4046 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004047 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004048 SKIP(6);
4049 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004050 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004051 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4052 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004053 }
4054 SKIP_BLANKS;
4055 }
4056 ret = xmlParseAttValue(ctxt);
4057 ctxt->instate = XML_PARSER_DTD;
4058 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004059 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004060 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004061 } else
4062 *value = ret;
4063 return(val);
4064}
4065
4066/**
4067 * xmlParseNotationType:
4068 * @ctxt: an XML parser context
4069 *
4070 * parse an Notation attribute type.
4071 *
4072 * Note: the leading 'NOTATION' S part has already being parsed...
4073 *
4074 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4075 *
4076 * [ VC: Notation Attributes ]
4077 * Values of this type must match one of the notation names included
4078 * in the declaration; all notation names in the declaration must be declared.
4079 *
4080 * Returns: the notation attribute tree built while parsing
4081 */
4082
4083xmlEnumerationPtr
4084xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004085 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4087
4088 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004089 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004090 return(NULL);
4091 }
4092 SHRINK;
4093 do {
4094 NEXT;
4095 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004096 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004097 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004098 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4099 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004100 return(ret);
4101 }
4102 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004103 if (cur == NULL) return(ret);
4104 if (last == NULL) ret = last = cur;
4105 else {
4106 last->next = cur;
4107 last = cur;
4108 }
4109 SKIP_BLANKS;
4110 } while (RAW == '|');
4111 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004112 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004113 if ((last != NULL) && (last != ret))
4114 xmlFreeEnumeration(last);
4115 return(ret);
4116 }
4117 NEXT;
4118 return(ret);
4119}
4120
4121/**
4122 * xmlParseEnumerationType:
4123 * @ctxt: an XML parser context
4124 *
4125 * parse an Enumeration attribute type.
4126 *
4127 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4128 *
4129 * [ VC: Enumeration ]
4130 * Values of this type must match one of the Nmtoken tokens in
4131 * the declaration
4132 *
4133 * Returns: the enumeration attribute tree built while parsing
4134 */
4135
4136xmlEnumerationPtr
4137xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4138 xmlChar *name;
4139 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4140
4141 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004142 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004143 return(NULL);
4144 }
4145 SHRINK;
4146 do {
4147 NEXT;
4148 SKIP_BLANKS;
4149 name = xmlParseNmtoken(ctxt);
4150 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004151 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004152 return(ret);
4153 }
4154 cur = xmlCreateEnumeration(name);
4155 xmlFree(name);
4156 if (cur == NULL) return(ret);
4157 if (last == NULL) ret = last = cur;
4158 else {
4159 last->next = cur;
4160 last = cur;
4161 }
4162 SKIP_BLANKS;
4163 } while (RAW == '|');
4164 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004165 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004166 return(ret);
4167 }
4168 NEXT;
4169 return(ret);
4170}
4171
4172/**
4173 * xmlParseEnumeratedType:
4174 * @ctxt: an XML parser context
4175 * @tree: the enumeration tree built while parsing
4176 *
4177 * parse an Enumerated attribute type.
4178 *
4179 * [57] EnumeratedType ::= NotationType | Enumeration
4180 *
4181 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4182 *
4183 *
4184 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4185 */
4186
4187int
4188xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004189 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004190 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004191 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004192 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4193 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004194 return(0);
4195 }
4196 SKIP_BLANKS;
4197 *tree = xmlParseNotationType(ctxt);
4198 if (*tree == NULL) return(0);
4199 return(XML_ATTRIBUTE_NOTATION);
4200 }
4201 *tree = xmlParseEnumerationType(ctxt);
4202 if (*tree == NULL) return(0);
4203 return(XML_ATTRIBUTE_ENUMERATION);
4204}
4205
4206/**
4207 * xmlParseAttributeType:
4208 * @ctxt: an XML parser context
4209 * @tree: the enumeration tree built while parsing
4210 *
4211 * parse the Attribute list def for an element
4212 *
4213 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4214 *
4215 * [55] StringType ::= 'CDATA'
4216 *
4217 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4218 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4219 *
4220 * Validity constraints for attribute values syntax are checked in
4221 * xmlValidateAttributeValue()
4222 *
4223 * [ VC: ID ]
4224 * Values of type ID must match the Name production. A name must not
4225 * appear more than once in an XML document as a value of this type;
4226 * i.e., ID values must uniquely identify the elements which bear them.
4227 *
4228 * [ VC: One ID per Element Type ]
4229 * No element type may have more than one ID attribute specified.
4230 *
4231 * [ VC: ID Attribute Default ]
4232 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4233 *
4234 * [ VC: IDREF ]
4235 * Values of type IDREF must match the Name production, and values
4236 * of type IDREFS must match Names; each IDREF Name must match the value
4237 * of an ID attribute on some element in the XML document; i.e. IDREF
4238 * values must match the value of some ID attribute.
4239 *
4240 * [ VC: Entity Name ]
4241 * Values of type ENTITY must match the Name production, values
4242 * of type ENTITIES must match Names; each Entity Name must match the
4243 * name of an unparsed entity declared in the DTD.
4244 *
4245 * [ VC: Name Token ]
4246 * Values of type NMTOKEN must match the Nmtoken production; values
4247 * of type NMTOKENS must match Nmtokens.
4248 *
4249 * Returns the attribute type
4250 */
4251int
4252xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4253 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004254 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004255 SKIP(5);
4256 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004257 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004258 SKIP(6);
4259 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004260 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004261 SKIP(5);
4262 return(XML_ATTRIBUTE_IDREF);
4263 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4264 SKIP(2);
4265 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004266 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004267 SKIP(6);
4268 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004269 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004270 SKIP(8);
4271 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004272 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004273 SKIP(8);
4274 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004275 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004276 SKIP(7);
4277 return(XML_ATTRIBUTE_NMTOKEN);
4278 }
4279 return(xmlParseEnumeratedType(ctxt, tree));
4280}
4281
4282/**
4283 * xmlParseAttributeListDecl:
4284 * @ctxt: an XML parser context
4285 *
4286 * : parse the Attribute list def for an element
4287 *
4288 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4289 *
4290 * [53] AttDef ::= S Name S AttType S DefaultDecl
4291 *
4292 */
4293void
4294xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004295 const xmlChar *elemName;
4296 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004297 xmlEnumerationPtr tree;
4298
Daniel Veillarda07050d2003-10-19 14:46:32 +00004299 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004300 xmlParserInputPtr input = ctxt->input;
4301
4302 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004303 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004305 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004308 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004309 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004310 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4311 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004312 return;
4313 }
4314 SKIP_BLANKS;
4315 GROW;
4316 while (RAW != '>') {
4317 const xmlChar *check = CUR_PTR;
4318 int type;
4319 int def;
4320 xmlChar *defaultValue = NULL;
4321
4322 GROW;
4323 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004324 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004326 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4327 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004328 break;
4329 }
4330 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004331 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004332 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004333 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004334 if (defaultValue != NULL)
4335 xmlFree(defaultValue);
4336 break;
4337 }
4338 SKIP_BLANKS;
4339
4340 type = xmlParseAttributeType(ctxt, &tree);
4341 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004342 if (defaultValue != NULL)
4343 xmlFree(defaultValue);
4344 break;
4345 }
4346
4347 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004348 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004349 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4350 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004351 if (defaultValue != NULL)
4352 xmlFree(defaultValue);
4353 if (tree != NULL)
4354 xmlFreeEnumeration(tree);
4355 break;
4356 }
4357 SKIP_BLANKS;
4358
4359 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4360 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004361 if (defaultValue != NULL)
4362 xmlFree(defaultValue);
4363 if (tree != NULL)
4364 xmlFreeEnumeration(tree);
4365 break;
4366 }
4367
4368 GROW;
4369 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004370 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004372 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004373 if (defaultValue != NULL)
4374 xmlFree(defaultValue);
4375 if (tree != NULL)
4376 xmlFreeEnumeration(tree);
4377 break;
4378 }
4379 SKIP_BLANKS;
4380 }
4381 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004382 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4383 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004384 if (defaultValue != NULL)
4385 xmlFree(defaultValue);
4386 if (tree != NULL)
4387 xmlFreeEnumeration(tree);
4388 break;
4389 }
4390 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4391 (ctxt->sax->attributeDecl != NULL))
4392 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4393 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004394 else if (tree != NULL)
4395 xmlFreeEnumeration(tree);
4396
4397 if ((ctxt->sax2) && (defaultValue != NULL) &&
4398 (def != XML_ATTRIBUTE_IMPLIED) &&
4399 (def != XML_ATTRIBUTE_REQUIRED)) {
4400 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4401 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004402 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4403 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4404 }
Owen Taylor3473f882001-02-23 17:55:21 +00004405 if (defaultValue != NULL)
4406 xmlFree(defaultValue);
4407 GROW;
4408 }
4409 if (RAW == '>') {
4410 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004411 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4412 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004413 }
4414 NEXT;
4415 }
Owen Taylor3473f882001-02-23 17:55:21 +00004416 }
4417}
4418
4419/**
4420 * xmlParseElementMixedContentDecl:
4421 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004422 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004423 *
4424 * parse the declaration for a Mixed Element content
4425 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4426 *
4427 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4428 * '(' S? '#PCDATA' S? ')'
4429 *
4430 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4431 *
4432 * [ VC: No Duplicate Types ]
4433 * The same name must not appear more than once in a single
4434 * mixed-content declaration.
4435 *
4436 * returns: the list of the xmlElementContentPtr describing the element choices
4437 */
4438xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004439xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004440 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004441 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004442
4443 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004444 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004445 SKIP(7);
4446 SKIP_BLANKS;
4447 SHRINK;
4448 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004449 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004450 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4451"Element content declaration doesn't start and stop in the same entity\n",
4452 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004453 }
Owen Taylor3473f882001-02-23 17:55:21 +00004454 NEXT;
4455 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4456 if (RAW == '*') {
4457 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4458 NEXT;
4459 }
4460 return(ret);
4461 }
4462 if ((RAW == '(') || (RAW == '|')) {
4463 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4464 if (ret == NULL) return(NULL);
4465 }
4466 while (RAW == '|') {
4467 NEXT;
4468 if (elem == NULL) {
4469 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4470 if (ret == NULL) return(NULL);
4471 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004472 if (cur != NULL)
4473 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004474 cur = ret;
4475 } else {
4476 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4477 if (n == NULL) return(NULL);
4478 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004479 if (n->c1 != NULL)
4480 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004482 if (n != NULL)
4483 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004484 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004485 }
4486 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004487 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004488 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004489 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004490 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004491 xmlFreeElementContent(cur);
4492 return(NULL);
4493 }
4494 SKIP_BLANKS;
4495 GROW;
4496 }
4497 if ((RAW == ')') && (NXT(1) == '*')) {
4498 if (elem != NULL) {
4499 cur->c2 = xmlNewElementContent(elem,
4500 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004501 if (cur->c2 != NULL)
4502 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004503 }
4504 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004506 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4507"Element content declaration doesn't start and stop in the same entity\n",
4508 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004509 }
Owen Taylor3473f882001-02-23 17:55:21 +00004510 SKIP(2);
4511 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004512 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004513 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004514 return(NULL);
4515 }
4516
4517 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004518 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004519 }
4520 return(ret);
4521}
4522
4523/**
4524 * xmlParseElementChildrenContentDecl:
4525 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004526 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004527 *
4528 * parse the declaration for a Mixed Element content
4529 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4530 *
4531 *
4532 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4533 *
4534 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4535 *
4536 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4537 *
4538 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4539 *
4540 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4541 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004542 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004543 * opening or closing parentheses in a choice, seq, or Mixed
4544 * construct is contained in the replacement text for a parameter
4545 * entity, both must be contained in the same replacement text. For
4546 * interoperability, if a parameter-entity reference appears in a
4547 * choice, seq, or Mixed construct, its replacement text should not
4548 * be empty, and neither the first nor last non-blank character of
4549 * the replacement text should be a connector (| or ,).
4550 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004551 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004552 * hierarchy.
4553 */
4554xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004555xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004556 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004557 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004558 xmlChar type = 0;
4559
4560 SKIP_BLANKS;
4561 GROW;
4562 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004563 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004564
Owen Taylor3473f882001-02-23 17:55:21 +00004565 /* Recurse on first child */
4566 NEXT;
4567 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004568 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004569 SKIP_BLANKS;
4570 GROW;
4571 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004572 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004573 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004574 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004575 return(NULL);
4576 }
4577 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004578 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004579 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004580 return(NULL);
4581 }
Owen Taylor3473f882001-02-23 17:55:21 +00004582 GROW;
4583 if (RAW == '?') {
4584 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4585 NEXT;
4586 } else if (RAW == '*') {
4587 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4588 NEXT;
4589 } else if (RAW == '+') {
4590 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4591 NEXT;
4592 } else {
4593 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4594 }
Owen Taylor3473f882001-02-23 17:55:21 +00004595 GROW;
4596 }
4597 SKIP_BLANKS;
4598 SHRINK;
4599 while (RAW != ')') {
4600 /*
4601 * Each loop we parse one separator and one element.
4602 */
4603 if (RAW == ',') {
4604 if (type == 0) type = CUR;
4605
4606 /*
4607 * Detect "Name | Name , Name" error
4608 */
4609 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004610 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004611 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004612 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004613 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004614 xmlFreeElementContent(last);
4615 if (ret != NULL)
4616 xmlFreeElementContent(ret);
4617 return(NULL);
4618 }
4619 NEXT;
4620
4621 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4622 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004623 if ((last != NULL) && (last != ret))
4624 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004625 xmlFreeElementContent(ret);
4626 return(NULL);
4627 }
4628 if (last == NULL) {
4629 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004630 if (ret != NULL)
4631 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004632 ret = cur = op;
4633 } else {
4634 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004635 if (op != NULL)
4636 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004638 if (last != NULL)
4639 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004640 cur =op;
4641 last = NULL;
4642 }
4643 } else if (RAW == '|') {
4644 if (type == 0) type = CUR;
4645
4646 /*
4647 * Detect "Name , Name | Name" error
4648 */
4649 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004650 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004651 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004652 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004653 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004654 xmlFreeElementContent(last);
4655 if (ret != NULL)
4656 xmlFreeElementContent(ret);
4657 return(NULL);
4658 }
4659 NEXT;
4660
4661 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4662 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004663 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004664 xmlFreeElementContent(last);
4665 if (ret != NULL)
4666 xmlFreeElementContent(ret);
4667 return(NULL);
4668 }
4669 if (last == NULL) {
4670 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004671 if (ret != NULL)
4672 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004673 ret = cur = op;
4674 } else {
4675 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004676 if (op != NULL)
4677 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004678 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004679 if (last != NULL)
4680 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004681 cur =op;
4682 last = NULL;
4683 }
4684 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004685 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004686 if (ret != NULL)
4687 xmlFreeElementContent(ret);
4688 return(NULL);
4689 }
4690 GROW;
4691 SKIP_BLANKS;
4692 GROW;
4693 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004694 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004695 /* Recurse on second child */
4696 NEXT;
4697 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004698 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004699 SKIP_BLANKS;
4700 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004701 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004702 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004703 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004704 if (ret != NULL)
4705 xmlFreeElementContent(ret);
4706 return(NULL);
4707 }
4708 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004709 if (RAW == '?') {
4710 last->ocur = XML_ELEMENT_CONTENT_OPT;
4711 NEXT;
4712 } else if (RAW == '*') {
4713 last->ocur = XML_ELEMENT_CONTENT_MULT;
4714 NEXT;
4715 } else if (RAW == '+') {
4716 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4717 NEXT;
4718 } else {
4719 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4720 }
4721 }
4722 SKIP_BLANKS;
4723 GROW;
4724 }
4725 if ((cur != NULL) && (last != NULL)) {
4726 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004727 if (last != NULL)
4728 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004729 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004730 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004731 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4732"Element content declaration doesn't start and stop in the same entity\n",
4733 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004734 }
Owen Taylor3473f882001-02-23 17:55:21 +00004735 NEXT;
4736 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004737 if (ret != NULL)
4738 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004739 NEXT;
4740 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004741 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004742 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004743 cur = ret;
4744 /*
4745 * Some normalization:
4746 * (a | b* | c?)* == (a | b | c)*
4747 */
4748 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4749 if ((cur->c1 != NULL) &&
4750 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4751 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4752 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4753 if ((cur->c2 != NULL) &&
4754 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4755 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4756 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4757 cur = cur->c2;
4758 }
4759 }
Owen Taylor3473f882001-02-23 17:55:21 +00004760 NEXT;
4761 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004762 if (ret != NULL) {
4763 int found = 0;
4764
Daniel Veillarde470df72001-04-18 21:41:07 +00004765 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004766 /*
4767 * Some normalization:
4768 * (a | b*)+ == (a | b)*
4769 * (a | b?)+ == (a | b)*
4770 */
4771 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4772 if ((cur->c1 != NULL) &&
4773 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4774 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4775 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4776 found = 1;
4777 }
4778 if ((cur->c2 != NULL) &&
4779 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4780 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4781 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4782 found = 1;
4783 }
4784 cur = cur->c2;
4785 }
4786 if (found)
4787 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4788 }
Owen Taylor3473f882001-02-23 17:55:21 +00004789 NEXT;
4790 }
4791 return(ret);
4792}
4793
4794/**
4795 * xmlParseElementContentDecl:
4796 * @ctxt: an XML parser context
4797 * @name: the name of the element being defined.
4798 * @result: the Element Content pointer will be stored here if any
4799 *
4800 * parse the declaration for an Element content either Mixed or Children,
4801 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4802 *
4803 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4804 *
4805 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4806 */
4807
4808int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004809xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004810 xmlElementContentPtr *result) {
4811
4812 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004813 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004814 int res;
4815
4816 *result = NULL;
4817
4818 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004819 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004820 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(-1);
4822 }
4823 NEXT;
4824 GROW;
4825 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004826 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004827 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004828 res = XML_ELEMENT_TYPE_MIXED;
4829 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004830 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 res = XML_ELEMENT_TYPE_ELEMENT;
4832 }
Owen Taylor3473f882001-02-23 17:55:21 +00004833 SKIP_BLANKS;
4834 *result = tree;
4835 return(res);
4836}
4837
4838/**
4839 * xmlParseElementDecl:
4840 * @ctxt: an XML parser context
4841 *
4842 * parse an Element declaration.
4843 *
4844 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4845 *
4846 * [ VC: Unique Element Type Declaration ]
4847 * No element type may be declared more than once
4848 *
4849 * Returns the type of the element, or -1 in case of error
4850 */
4851int
4852xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004853 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004854 int ret = -1;
4855 xmlElementContentPtr content = NULL;
4856
4857 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004858 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004859 xmlParserInputPtr input = ctxt->input;
4860
4861 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004862 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004863 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4864 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004865 }
4866 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004867 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004868 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004869 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4870 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004871 return(-1);
4872 }
4873 while ((RAW == 0) && (ctxt->inputNr > 1))
4874 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004875 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004876 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4877 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004878 }
4879 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004880 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004881 SKIP(5);
4882 /*
4883 * Element must always be empty.
4884 */
4885 ret = XML_ELEMENT_TYPE_EMPTY;
4886 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4887 (NXT(2) == 'Y')) {
4888 SKIP(3);
4889 /*
4890 * Element is a generic container.
4891 */
4892 ret = XML_ELEMENT_TYPE_ANY;
4893 } else if (RAW == '(') {
4894 ret = xmlParseElementContentDecl(ctxt, name, &content);
4895 } else {
4896 /*
4897 * [ WFC: PEs in Internal Subset ] error handling.
4898 */
4899 if ((RAW == '%') && (ctxt->external == 0) &&
4900 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004901 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004902 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004903 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00004904 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00004905 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4906 }
Owen Taylor3473f882001-02-23 17:55:21 +00004907 return(-1);
4908 }
4909
4910 SKIP_BLANKS;
4911 /*
4912 * Pop-up of finished entities.
4913 */
4914 while ((RAW == 0) && (ctxt->inputNr > 1))
4915 xmlPopInput(ctxt);
4916 SKIP_BLANKS;
4917
4918 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004919 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004920 } else {
4921 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004922 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4923 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004924 }
4925
4926 NEXT;
4927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4928 (ctxt->sax->elementDecl != NULL))
4929 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4930 content);
4931 }
4932 if (content != NULL) {
4933 xmlFreeElementContent(content);
4934 }
Owen Taylor3473f882001-02-23 17:55:21 +00004935 }
4936 return(ret);
4937}
4938
4939/**
Owen Taylor3473f882001-02-23 17:55:21 +00004940 * xmlParseConditionalSections
4941 * @ctxt: an XML parser context
4942 *
4943 * [61] conditionalSect ::= includeSect | ignoreSect
4944 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4945 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4946 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4947 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4948 */
4949
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004950static void
Owen Taylor3473f882001-02-23 17:55:21 +00004951xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4952 SKIP(3);
4953 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004954 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004955 SKIP(7);
4956 SKIP_BLANKS;
4957 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004958 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004959 } else {
4960 NEXT;
4961 }
4962 if (xmlParserDebugEntities) {
4963 if ((ctxt->input != NULL) && (ctxt->input->filename))
4964 xmlGenericError(xmlGenericErrorContext,
4965 "%s(%d): ", ctxt->input->filename,
4966 ctxt->input->line);
4967 xmlGenericError(xmlGenericErrorContext,
4968 "Entering INCLUDE Conditional Section\n");
4969 }
4970
4971 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4972 (NXT(2) != '>'))) {
4973 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00004974 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00004975
4976 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4977 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00004978 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004979 NEXT;
4980 } else if (RAW == '%') {
4981 xmlParsePEReference(ctxt);
4982 } else
4983 xmlParseMarkupDecl(ctxt);
4984
4985 /*
4986 * Pop-up of finished entities.
4987 */
4988 while ((RAW == 0) && (ctxt->inputNr > 1))
4989 xmlPopInput(ctxt);
4990
Daniel Veillardfdc91562002-07-01 21:52:03 +00004991 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004992 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 break;
4994 }
4995 }
4996 if (xmlParserDebugEntities) {
4997 if ((ctxt->input != NULL) && (ctxt->input->filename))
4998 xmlGenericError(xmlGenericErrorContext,
4999 "%s(%d): ", ctxt->input->filename,
5000 ctxt->input->line);
5001 xmlGenericError(xmlGenericErrorContext,
5002 "Leaving INCLUDE Conditional Section\n");
5003 }
5004
Daniel Veillarda07050d2003-10-19 14:46:32 +00005005 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005006 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005007 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 int depth = 0;
5009
5010 SKIP(6);
5011 SKIP_BLANKS;
5012 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005013 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005014 } else {
5015 NEXT;
5016 }
5017 if (xmlParserDebugEntities) {
5018 if ((ctxt->input != NULL) && (ctxt->input->filename))
5019 xmlGenericError(xmlGenericErrorContext,
5020 "%s(%d): ", ctxt->input->filename,
5021 ctxt->input->line);
5022 xmlGenericError(xmlGenericErrorContext,
5023 "Entering IGNORE Conditional Section\n");
5024 }
5025
5026 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005027 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005028 * But disable SAX event generating DTD building in the meantime
5029 */
5030 state = ctxt->disableSAX;
5031 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005032 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005033 ctxt->instate = XML_PARSER_IGNORE;
5034
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005035 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005036 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5037 depth++;
5038 SKIP(3);
5039 continue;
5040 }
5041 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5042 if (--depth >= 0) SKIP(3);
5043 continue;
5044 }
5045 NEXT;
5046 continue;
5047 }
5048
5049 ctxt->disableSAX = state;
5050 ctxt->instate = instate;
5051
5052 if (xmlParserDebugEntities) {
5053 if ((ctxt->input != NULL) && (ctxt->input->filename))
5054 xmlGenericError(xmlGenericErrorContext,
5055 "%s(%d): ", ctxt->input->filename,
5056 ctxt->input->line);
5057 xmlGenericError(xmlGenericErrorContext,
5058 "Leaving IGNORE Conditional Section\n");
5059 }
5060
5061 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005062 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005063 }
5064
5065 if (RAW == 0)
5066 SHRINK;
5067
5068 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005069 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 } else {
5071 SKIP(3);
5072 }
5073}
5074
5075/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005076 * xmlParseMarkupDecl:
5077 * @ctxt: an XML parser context
5078 *
5079 * parse Markup declarations
5080 *
5081 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5082 * NotationDecl | PI | Comment
5083 *
5084 * [ VC: Proper Declaration/PE Nesting ]
5085 * Parameter-entity replacement text must be properly nested with
5086 * markup declarations. That is to say, if either the first character
5087 * or the last character of a markup declaration (markupdecl above) is
5088 * contained in the replacement text for a parameter-entity reference,
5089 * both must be contained in the same replacement text.
5090 *
5091 * [ WFC: PEs in Internal Subset ]
5092 * In the internal DTD subset, parameter-entity references can occur
5093 * only where markup declarations can occur, not within markup declarations.
5094 * (This does not apply to references that occur in external parameter
5095 * entities or to the external subset.)
5096 */
5097void
5098xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5099 GROW;
5100 xmlParseElementDecl(ctxt);
5101 xmlParseAttributeListDecl(ctxt);
5102 xmlParseEntityDecl(ctxt);
5103 xmlParseNotationDecl(ctxt);
5104 xmlParsePI(ctxt);
5105 xmlParseComment(ctxt);
5106 /*
5107 * This is only for internal subset. On external entities,
5108 * the replacement is done before parsing stage
5109 */
5110 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5111 xmlParsePEReference(ctxt);
5112
5113 /*
5114 * Conditional sections are allowed from entities included
5115 * by PE References in the internal subset.
5116 */
5117 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5118 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5119 xmlParseConditionalSections(ctxt);
5120 }
5121 }
5122
5123 ctxt->instate = XML_PARSER_DTD;
5124}
5125
5126/**
5127 * xmlParseTextDecl:
5128 * @ctxt: an XML parser context
5129 *
5130 * parse an XML declaration header for external entities
5131 *
5132 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5133 *
5134 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5135 */
5136
5137void
5138xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5139 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005140 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005141
5142 /*
5143 * We know that '<?xml' is here.
5144 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005145 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005146 SKIP(5);
5147 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005148 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005149 return;
5150 }
5151
William M. Brack76e95df2003-10-18 16:20:14 +00005152 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005153 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5154 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005155 }
5156 SKIP_BLANKS;
5157
5158 /*
5159 * We may have the VersionInfo here.
5160 */
5161 version = xmlParseVersionInfo(ctxt);
5162 if (version == NULL)
5163 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005164 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005165 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005166 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5167 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005168 }
5169 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005170 ctxt->input->version = version;
5171
5172 /*
5173 * We must have the encoding declaration
5174 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005175 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005176 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5177 /*
5178 * The XML REC instructs us to stop parsing right here
5179 */
5180 return;
5181 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005182 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5183 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5184 "Missing encoding in text declaration\n");
5185 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005186
5187 SKIP_BLANKS;
5188 if ((RAW == '?') && (NXT(1) == '>')) {
5189 SKIP(2);
5190 } else if (RAW == '>') {
5191 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005192 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005193 NEXT;
5194 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005195 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005196 MOVETO_ENDTAG(CUR_PTR);
5197 NEXT;
5198 }
5199}
5200
5201/**
Owen Taylor3473f882001-02-23 17:55:21 +00005202 * xmlParseExternalSubset:
5203 * @ctxt: an XML parser context
5204 * @ExternalID: the external identifier
5205 * @SystemID: the system identifier (or URL)
5206 *
5207 * parse Markup declarations from an external subset
5208 *
5209 * [30] extSubset ::= textDecl? extSubsetDecl
5210 *
5211 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5212 */
5213void
5214xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5215 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005216 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005218 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005219 xmlParseTextDecl(ctxt);
5220 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5221 /*
5222 * The XML REC instructs us to stop parsing right here
5223 */
5224 ctxt->instate = XML_PARSER_EOF;
5225 return;
5226 }
5227 }
5228 if (ctxt->myDoc == NULL) {
5229 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5230 }
5231 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5232 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5233
5234 ctxt->instate = XML_PARSER_DTD;
5235 ctxt->external = 1;
5236 while (((RAW == '<') && (NXT(1) == '?')) ||
5237 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005238 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005239 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005240 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005241
5242 GROW;
5243 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5244 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005245 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005246 NEXT;
5247 } else if (RAW == '%') {
5248 xmlParsePEReference(ctxt);
5249 } else
5250 xmlParseMarkupDecl(ctxt);
5251
5252 /*
5253 * Pop-up of finished entities.
5254 */
5255 while ((RAW == 0) && (ctxt->inputNr > 1))
5256 xmlPopInput(ctxt);
5257
Daniel Veillardfdc91562002-07-01 21:52:03 +00005258 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005259 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005260 break;
5261 }
5262 }
5263
5264 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005265 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 }
5267
5268}
5269
5270/**
5271 * xmlParseReference:
5272 * @ctxt: an XML parser context
5273 *
5274 * parse and handle entity references in content, depending on the SAX
5275 * interface, this may end-up in a call to character() if this is a
5276 * CharRef, a predefined entity, if there is no reference() callback.
5277 * or if the parser was asked to switch to that mode.
5278 *
5279 * [67] Reference ::= EntityRef | CharRef
5280 */
5281void
5282xmlParseReference(xmlParserCtxtPtr ctxt) {
5283 xmlEntityPtr ent;
5284 xmlChar *val;
5285 if (RAW != '&') return;
5286
5287 if (NXT(1) == '#') {
5288 int i = 0;
5289 xmlChar out[10];
5290 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005291 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005292
5293 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5294 /*
5295 * So we are using non-UTF-8 buffers
5296 * Check that the char fit on 8bits, if not
5297 * generate a CharRef.
5298 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005299 if (value <= 0xFF) {
5300 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005301 out[1] = 0;
5302 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5303 (!ctxt->disableSAX))
5304 ctxt->sax->characters(ctxt->userData, out, 1);
5305 } else {
5306 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005307 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005309 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5311 (!ctxt->disableSAX))
5312 ctxt->sax->reference(ctxt->userData, out);
5313 }
5314 } else {
5315 /*
5316 * Just encode the value in UTF-8
5317 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005318 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005319 out[i] = 0;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5321 (!ctxt->disableSAX))
5322 ctxt->sax->characters(ctxt->userData, out, i);
5323 }
5324 } else {
5325 ent = xmlParseEntityRef(ctxt);
5326 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005327 if (!ctxt->wellFormed)
5328 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005329 if ((ent->name != NULL) &&
5330 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5331 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005332 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005333
5334
5335 /*
5336 * The first reference to the entity trigger a parsing phase
5337 * where the ent->children is filled with the result from
5338 * the parsing.
5339 */
5340 if (ent->children == NULL) {
5341 xmlChar *value;
5342 value = ent->content;
5343
5344 /*
5345 * Check that this entity is well formed
5346 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005347 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005348 (value[1] == 0) && (value[0] == '<') &&
5349 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5350 /*
5351 * DONE: get definite answer on this !!!
5352 * Lots of entity decls are used to declare a single
5353 * char
5354 * <!ENTITY lt "<">
5355 * Which seems to be valid since
5356 * 2.4: The ampersand character (&) and the left angle
5357 * bracket (<) may appear in their literal form only
5358 * when used ... They are also legal within the literal
5359 * entity value of an internal entity declaration;i
5360 * see "4.3.2 Well-Formed Parsed Entities".
5361 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5362 * Looking at the OASIS test suite and James Clark
5363 * tests, this is broken. However the XML REC uses
5364 * it. Is the XML REC not well-formed ????
5365 * This is a hack to avoid this problem
5366 *
5367 * ANSWER: since lt gt amp .. are already defined,
5368 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005369 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005370 * is lousy but acceptable.
5371 */
5372 list = xmlNewDocText(ctxt->myDoc, value);
5373 if (list != NULL) {
5374 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5375 (ent->children == NULL)) {
5376 ent->children = list;
5377 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005378 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005379 list->parent = (xmlNodePtr) ent;
5380 } else {
5381 xmlFreeNodeList(list);
5382 }
5383 } else if (list != NULL) {
5384 xmlFreeNodeList(list);
5385 }
5386 } else {
5387 /*
5388 * 4.3.2: An internal general parsed entity is well-formed
5389 * if its replacement text matches the production labeled
5390 * content.
5391 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005392
5393 void *user_data;
5394 /*
5395 * This is a bit hackish but this seems the best
5396 * way to make sure both SAX and DOM entity support
5397 * behaves okay.
5398 */
5399 if (ctxt->userData == ctxt)
5400 user_data = NULL;
5401 else
5402 user_data = ctxt->userData;
5403
Owen Taylor3473f882001-02-23 17:55:21 +00005404 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5405 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005406 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5407 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005408 ctxt->depth--;
5409 } else if (ent->etype ==
5410 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5411 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005412 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005413 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005414 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005415 ctxt->depth--;
5416 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005417 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005418 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5419 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 }
5421 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005422 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005423 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005424 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005425 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5426 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005427 (ent->children == NULL)) {
5428 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005429 if (ctxt->replaceEntities) {
5430 /*
5431 * Prune it directly in the generated document
5432 * except for single text nodes.
5433 */
5434 if ((list->type == XML_TEXT_NODE) &&
5435 (list->next == NULL)) {
5436 list->parent = (xmlNodePtr) ent;
5437 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005438 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005439 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005440 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005441 while (list != NULL) {
5442 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005443 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005444 if (list->next == NULL)
5445 ent->last = list;
5446 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005447 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005448 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005449#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005450 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5451 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005452#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005453 }
5454 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005455 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005456 while (list != NULL) {
5457 list->parent = (xmlNodePtr) ent;
5458 if (list->next == NULL)
5459 ent->last = list;
5460 list = list->next;
5461 }
Owen Taylor3473f882001-02-23 17:55:21 +00005462 }
5463 } else {
5464 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005465 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005466 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005467 } else if ((ret != XML_ERR_OK) &&
5468 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005469 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005470 } else if (list != NULL) {
5471 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005472 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005473 }
5474 }
5475 }
5476 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5477 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5478 /*
5479 * Create a node.
5480 */
5481 ctxt->sax->reference(ctxt->userData, ent->name);
5482 return;
5483 } else if (ctxt->replaceEntities) {
5484 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5485 /*
5486 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005487 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005488 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005489 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005490 if ((list == NULL) && (ent->owner == 0)) {
5491 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005492 cur = ent->children;
5493 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005494 nw = xmlCopyNode(cur, 1);
5495 if (nw != NULL) {
5496 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005497 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005498 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005499 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005500 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005501 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005502 if (cur == ent->last)
5503 break;
5504 cur = cur->next;
5505 }
Daniel Veillard81273902003-09-30 00:43:48 +00005506#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005507 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005508 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005509#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005510 } else if (list == NULL) {
5511 xmlNodePtr nw = NULL, cur, next, last,
5512 firstChild = NULL;
5513 /*
5514 * Copy the entity child list and make it the new
5515 * entity child list. The goal is to make sure any
5516 * ID or REF referenced will be the one from the
5517 * document content and not the entity copy.
5518 */
5519 cur = ent->children;
5520 ent->children = NULL;
5521 last = ent->last;
5522 ent->last = NULL;
5523 while (cur != NULL) {
5524 next = cur->next;
5525 cur->next = NULL;
5526 cur->parent = NULL;
5527 nw = xmlCopyNode(cur, 1);
5528 if (nw != NULL) {
5529 nw->_private = cur->_private;
5530 if (firstChild == NULL){
5531 firstChild = cur;
5532 }
5533 xmlAddChild((xmlNodePtr) ent, nw);
5534 xmlAddChild(ctxt->node, cur);
5535 }
5536 if (cur == last)
5537 break;
5538 cur = next;
5539 }
5540 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005541#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005542 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5543 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005544#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005545 } else {
5546 /*
5547 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005548 * node with a possible previous text one which
5549 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005550 */
5551 if (ent->children->type == XML_TEXT_NODE)
5552 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5553 if ((ent->last != ent->children) &&
5554 (ent->last->type == XML_TEXT_NODE))
5555 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5556 xmlAddChildList(ctxt->node, ent->children);
5557 }
5558
Owen Taylor3473f882001-02-23 17:55:21 +00005559 /*
5560 * This is to avoid a nasty side effect, see
5561 * characters() in SAX.c
5562 */
5563 ctxt->nodemem = 0;
5564 ctxt->nodelen = 0;
5565 return;
5566 } else {
5567 /*
5568 * Probably running in SAX mode
5569 */
5570 xmlParserInputPtr input;
5571
5572 input = xmlNewEntityInputStream(ctxt, ent);
5573 xmlPushInput(ctxt, input);
5574 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005575 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5576 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005577 xmlParseTextDecl(ctxt);
5578 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5579 /*
5580 * The XML REC instructs us to stop parsing right here
5581 */
5582 ctxt->instate = XML_PARSER_EOF;
5583 return;
5584 }
5585 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005586 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5587 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005588 }
5589 }
5590 return;
5591 }
5592 }
5593 } else {
5594 val = ent->content;
5595 if (val == NULL) return;
5596 /*
5597 * inline the entity.
5598 */
5599 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5600 (!ctxt->disableSAX))
5601 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5602 }
5603 }
5604}
5605
5606/**
5607 * xmlParseEntityRef:
5608 * @ctxt: an XML parser context
5609 *
5610 * parse ENTITY references declarations
5611 *
5612 * [68] EntityRef ::= '&' Name ';'
5613 *
5614 * [ WFC: Entity Declared ]
5615 * In a document without any DTD, a document with only an internal DTD
5616 * subset which contains no parameter entity references, or a document
5617 * with "standalone='yes'", the Name given in the entity reference
5618 * must match that in an entity declaration, except that well-formed
5619 * documents need not declare any of the following entities: amp, lt,
5620 * gt, apos, quot. The declaration of a parameter entity must precede
5621 * any reference to it. Similarly, the declaration of a general entity
5622 * must precede any reference to it which appears in a default value in an
5623 * attribute-list declaration. Note that if entities are declared in the
5624 * external subset or in external parameter entities, a non-validating
5625 * processor is not obligated to read and process their declarations;
5626 * for such documents, the rule that an entity must be declared is a
5627 * well-formedness constraint only if standalone='yes'.
5628 *
5629 * [ WFC: Parsed Entity ]
5630 * An entity reference must not contain the name of an unparsed entity
5631 *
5632 * Returns the xmlEntityPtr if found, or NULL otherwise.
5633 */
5634xmlEntityPtr
5635xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005636 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005637 xmlEntityPtr ent = NULL;
5638
5639 GROW;
5640
5641 if (RAW == '&') {
5642 NEXT;
5643 name = xmlParseName(ctxt);
5644 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005645 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5646 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005647 } else {
5648 if (RAW == ';') {
5649 NEXT;
5650 /*
5651 * Ask first SAX for entity resolution, otherwise try the
5652 * predefined set.
5653 */
5654 if (ctxt->sax != NULL) {
5655 if (ctxt->sax->getEntity != NULL)
5656 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005657 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005658 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005659 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5660 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005661 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005662 }
Owen Taylor3473f882001-02-23 17:55:21 +00005663 }
5664 /*
5665 * [ WFC: Entity Declared ]
5666 * In a document without any DTD, a document with only an
5667 * internal DTD subset which contains no parameter entity
5668 * references, or a document with "standalone='yes'", the
5669 * Name given in the entity reference must match that in an
5670 * entity declaration, except that well-formed documents
5671 * need not declare any of the following entities: amp, lt,
5672 * gt, apos, quot.
5673 * The declaration of a parameter entity must precede any
5674 * reference to it.
5675 * Similarly, the declaration of a general entity must
5676 * precede any reference to it which appears in a default
5677 * value in an attribute-list declaration. Note that if
5678 * entities are declared in the external subset or in
5679 * external parameter entities, a non-validating processor
5680 * is not obligated to read and process their declarations;
5681 * for such documents, the rule that an entity must be
5682 * declared is a well-formedness constraint only if
5683 * standalone='yes'.
5684 */
5685 if (ent == NULL) {
5686 if ((ctxt->standalone == 1) ||
5687 ((ctxt->hasExternalSubset == 0) &&
5688 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005689 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005690 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005691 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005692 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005693 "Entity '%s' not defined\n", name);
5694 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005695 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005696 }
5697
5698 /*
5699 * [ WFC: Parsed Entity ]
5700 * An entity reference must not contain the name of an
5701 * unparsed entity
5702 */
5703 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005704 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005705 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005706 }
5707
5708 /*
5709 * [ WFC: No External Entity References ]
5710 * Attribute values cannot contain direct or indirect
5711 * entity references to external entities.
5712 */
5713 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5714 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005715 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5716 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005717 }
5718 /*
5719 * [ WFC: No < in Attribute Values ]
5720 * The replacement text of any entity referred to directly or
5721 * indirectly in an attribute value (other than "&lt;") must
5722 * not contain a <.
5723 */
5724 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5725 (ent != NULL) &&
5726 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5727 (ent->content != NULL) &&
5728 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005729 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005730 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005731 }
5732
5733 /*
5734 * Internal check, no parameter entities here ...
5735 */
5736 else {
5737 switch (ent->etype) {
5738 case XML_INTERNAL_PARAMETER_ENTITY:
5739 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005740 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5741 "Attempt to reference the parameter entity '%s'\n",
5742 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005743 break;
5744 default:
5745 break;
5746 }
5747 }
5748
5749 /*
5750 * [ WFC: No Recursion ]
5751 * A parsed entity must not contain a recursive reference
5752 * to itself, either directly or indirectly.
5753 * Done somewhere else
5754 */
5755
5756 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005757 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 }
Owen Taylor3473f882001-02-23 17:55:21 +00005759 }
5760 }
5761 return(ent);
5762}
5763
5764/**
5765 * xmlParseStringEntityRef:
5766 * @ctxt: an XML parser context
5767 * @str: a pointer to an index in the string
5768 *
5769 * parse ENTITY references declarations, but this version parses it from
5770 * a string value.
5771 *
5772 * [68] EntityRef ::= '&' Name ';'
5773 *
5774 * [ WFC: Entity Declared ]
5775 * In a document without any DTD, a document with only an internal DTD
5776 * subset which contains no parameter entity references, or a document
5777 * with "standalone='yes'", the Name given in the entity reference
5778 * must match that in an entity declaration, except that well-formed
5779 * documents need not declare any of the following entities: amp, lt,
5780 * gt, apos, quot. The declaration of a parameter entity must precede
5781 * any reference to it. Similarly, the declaration of a general entity
5782 * must precede any reference to it which appears in a default value in an
5783 * attribute-list declaration. Note that if entities are declared in the
5784 * external subset or in external parameter entities, a non-validating
5785 * processor is not obligated to read and process their declarations;
5786 * for such documents, the rule that an entity must be declared is a
5787 * well-formedness constraint only if standalone='yes'.
5788 *
5789 * [ WFC: Parsed Entity ]
5790 * An entity reference must not contain the name of an unparsed entity
5791 *
5792 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5793 * is updated to the current location in the string.
5794 */
5795xmlEntityPtr
5796xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5797 xmlChar *name;
5798 const xmlChar *ptr;
5799 xmlChar cur;
5800 xmlEntityPtr ent = NULL;
5801
5802 if ((str == NULL) || (*str == NULL))
5803 return(NULL);
5804 ptr = *str;
5805 cur = *ptr;
5806 if (cur == '&') {
5807 ptr++;
5808 cur = *ptr;
5809 name = xmlParseStringName(ctxt, &ptr);
5810 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005811 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5812 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005813 } else {
5814 if (*ptr == ';') {
5815 ptr++;
5816 /*
5817 * Ask first SAX for entity resolution, otherwise try the
5818 * predefined set.
5819 */
5820 if (ctxt->sax != NULL) {
5821 if (ctxt->sax->getEntity != NULL)
5822 ent = ctxt->sax->getEntity(ctxt->userData, name);
5823 if (ent == NULL)
5824 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005825 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005826 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005827 }
Owen Taylor3473f882001-02-23 17:55:21 +00005828 }
5829 /*
5830 * [ WFC: Entity Declared ]
5831 * In a document without any DTD, a document with only an
5832 * internal DTD subset which contains no parameter entity
5833 * references, or a document with "standalone='yes'", the
5834 * Name given in the entity reference must match that in an
5835 * entity declaration, except that well-formed documents
5836 * need not declare any of the following entities: amp, lt,
5837 * gt, apos, quot.
5838 * The declaration of a parameter entity must precede any
5839 * reference to it.
5840 * Similarly, the declaration of a general entity must
5841 * precede any reference to it which appears in a default
5842 * value in an attribute-list declaration. Note that if
5843 * entities are declared in the external subset or in
5844 * external parameter entities, a non-validating processor
5845 * is not obligated to read and process their declarations;
5846 * for such documents, the rule that an entity must be
5847 * declared is a well-formedness constraint only if
5848 * standalone='yes'.
5849 */
5850 if (ent == NULL) {
5851 if ((ctxt->standalone == 1) ||
5852 ((ctxt->hasExternalSubset == 0) &&
5853 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005854 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005855 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005856 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005857 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00005858 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00005859 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005860 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005861 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00005862 }
5863
5864 /*
5865 * [ WFC: Parsed Entity ]
5866 * An entity reference must not contain the name of an
5867 * unparsed entity
5868 */
5869 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005870 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005871 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005872 }
5873
5874 /*
5875 * [ WFC: No External Entity References ]
5876 * Attribute values cannot contain direct or indirect
5877 * entity references to external entities.
5878 */
5879 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5880 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005881 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00005882 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884 /*
5885 * [ WFC: No < in Attribute Values ]
5886 * The replacement text of any entity referred to directly or
5887 * indirectly in an attribute value (other than "&lt;") must
5888 * not contain a <.
5889 */
5890 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5891 (ent != NULL) &&
5892 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5893 (ent->content != NULL) &&
5894 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005895 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
5896 "'<' in entity '%s' is not allowed in attributes values\n",
5897 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005898 }
5899
5900 /*
5901 * Internal check, no parameter entities here ...
5902 */
5903 else {
5904 switch (ent->etype) {
5905 case XML_INTERNAL_PARAMETER_ENTITY:
5906 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00005907 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5908 "Attempt to reference the parameter entity '%s'\n",
5909 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005910 break;
5911 default:
5912 break;
5913 }
5914 }
5915
5916 /*
5917 * [ WFC: No Recursion ]
5918 * A parsed entity must not contain a recursive reference
5919 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005920 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005921 */
5922
5923 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005924 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 }
5926 xmlFree(name);
5927 }
5928 }
5929 *str = ptr;
5930 return(ent);
5931}
5932
5933/**
5934 * xmlParsePEReference:
5935 * @ctxt: an XML parser context
5936 *
5937 * parse PEReference declarations
5938 * The entity content is handled directly by pushing it's content as
5939 * a new input stream.
5940 *
5941 * [69] PEReference ::= '%' Name ';'
5942 *
5943 * [ WFC: No Recursion ]
5944 * A parsed entity must not contain a recursive
5945 * reference to itself, either directly or indirectly.
5946 *
5947 * [ WFC: Entity Declared ]
5948 * In a document without any DTD, a document with only an internal DTD
5949 * subset which contains no parameter entity references, or a document
5950 * with "standalone='yes'", ... ... The declaration of a parameter
5951 * entity must precede any reference to it...
5952 *
5953 * [ VC: Entity Declared ]
5954 * In a document with an external subset or external parameter entities
5955 * with "standalone='no'", ... ... The declaration of a parameter entity
5956 * must precede any reference to it...
5957 *
5958 * [ WFC: In DTD ]
5959 * Parameter-entity references may only appear in the DTD.
5960 * NOTE: misleading but this is handled.
5961 */
5962void
Daniel Veillard8f597c32003-10-06 08:19:27 +00005963xmlParsePEReference(xmlParserCtxtPtr ctxt)
5964{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005965 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005966 xmlEntityPtr entity = NULL;
5967 xmlParserInputPtr input;
5968
5969 if (RAW == '%') {
5970 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005971 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00005972 if (name == NULL) {
5973 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5974 "xmlParsePEReference: no name\n");
5975 } else {
5976 if (RAW == ';') {
5977 NEXT;
5978 if ((ctxt->sax != NULL) &&
5979 (ctxt->sax->getParameterEntity != NULL))
5980 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5981 name);
5982 if (entity == NULL) {
5983 /*
5984 * [ WFC: Entity Declared ]
5985 * In a document without any DTD, a document with only an
5986 * internal DTD subset which contains no parameter entity
5987 * references, or a document with "standalone='yes'", ...
5988 * ... The declaration of a parameter entity must precede
5989 * any reference to it...
5990 */
5991 if ((ctxt->standalone == 1) ||
5992 ((ctxt->hasExternalSubset == 0) &&
5993 (ctxt->hasPErefs == 0))) {
5994 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
5995 "PEReference: %%%s; not found\n",
5996 name);
5997 } else {
5998 /*
5999 * [ VC: Entity Declared ]
6000 * In a document with an external subset or external
6001 * parameter entities with "standalone='no'", ...
6002 * ... The declaration of a parameter entity must
6003 * precede any reference to it...
6004 */
6005 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6006 "PEReference: %%%s; not found\n",
6007 name, NULL);
6008 ctxt->valid = 0;
6009 }
6010 } else {
6011 /*
6012 * Internal checking in case the entity quest barfed
6013 */
6014 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6015 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6016 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6017 "Internal: %%%s; is not a parameter entity\n",
6018 name, NULL);
6019 } else if (ctxt->input->free != deallocblankswrapper) {
6020 input =
6021 xmlNewBlanksWrapperInputStream(ctxt, entity);
6022 xmlPushInput(ctxt, input);
6023 } else {
6024 /*
6025 * TODO !!!
6026 * handle the extra spaces added before and after
6027 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6028 */
6029 input = xmlNewEntityInputStream(ctxt, entity);
6030 xmlPushInput(ctxt, input);
6031 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006032 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006033 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006034 xmlParseTextDecl(ctxt);
6035 if (ctxt->errNo ==
6036 XML_ERR_UNSUPPORTED_ENCODING) {
6037 /*
6038 * The XML REC instructs us to stop parsing
6039 * right here
6040 */
6041 ctxt->instate = XML_PARSER_EOF;
6042 return;
6043 }
6044 }
6045 }
6046 }
6047 ctxt->hasPErefs = 1;
6048 } else {
6049 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6050 }
6051 }
Owen Taylor3473f882001-02-23 17:55:21 +00006052 }
6053}
6054
6055/**
6056 * xmlParseStringPEReference:
6057 * @ctxt: an XML parser context
6058 * @str: a pointer to an index in the string
6059 *
6060 * parse PEReference declarations
6061 *
6062 * [69] PEReference ::= '%' Name ';'
6063 *
6064 * [ WFC: No Recursion ]
6065 * A parsed entity must not contain a recursive
6066 * reference to itself, either directly or indirectly.
6067 *
6068 * [ WFC: Entity Declared ]
6069 * In a document without any DTD, a document with only an internal DTD
6070 * subset which contains no parameter entity references, or a document
6071 * with "standalone='yes'", ... ... The declaration of a parameter
6072 * entity must precede any reference to it...
6073 *
6074 * [ VC: Entity Declared ]
6075 * In a document with an external subset or external parameter entities
6076 * with "standalone='no'", ... ... The declaration of a parameter entity
6077 * must precede any reference to it...
6078 *
6079 * [ WFC: In DTD ]
6080 * Parameter-entity references may only appear in the DTD.
6081 * NOTE: misleading but this is handled.
6082 *
6083 * Returns the string of the entity content.
6084 * str is updated to the current value of the index
6085 */
6086xmlEntityPtr
6087xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6088 const xmlChar *ptr;
6089 xmlChar cur;
6090 xmlChar *name;
6091 xmlEntityPtr entity = NULL;
6092
6093 if ((str == NULL) || (*str == NULL)) return(NULL);
6094 ptr = *str;
6095 cur = *ptr;
6096 if (cur == '%') {
6097 ptr++;
6098 cur = *ptr;
6099 name = xmlParseStringName(ctxt, &ptr);
6100 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006101 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6102 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006103 } else {
6104 cur = *ptr;
6105 if (cur == ';') {
6106 ptr++;
6107 cur = *ptr;
6108 if ((ctxt->sax != NULL) &&
6109 (ctxt->sax->getParameterEntity != NULL))
6110 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6111 name);
6112 if (entity == NULL) {
6113 /*
6114 * [ WFC: Entity Declared ]
6115 * In a document without any DTD, a document with only an
6116 * internal DTD subset which contains no parameter entity
6117 * references, or a document with "standalone='yes'", ...
6118 * ... The declaration of a parameter entity must precede
6119 * any reference to it...
6120 */
6121 if ((ctxt->standalone == 1) ||
6122 ((ctxt->hasExternalSubset == 0) &&
6123 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006124 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006125 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006126 } else {
6127 /*
6128 * [ VC: Entity Declared ]
6129 * In a document with an external subset or external
6130 * parameter entities with "standalone='no'", ...
6131 * ... The declaration of a parameter entity must
6132 * precede any reference to it...
6133 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006134 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6135 "PEReference: %%%s; not found\n",
6136 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006137 ctxt->valid = 0;
6138 }
6139 } else {
6140 /*
6141 * Internal checking in case the entity quest barfed
6142 */
6143 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6144 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006145 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6146 "%%%s; is not a parameter entity\n",
6147 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 }
6149 }
6150 ctxt->hasPErefs = 1;
6151 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006152 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 }
6154 xmlFree(name);
6155 }
6156 }
6157 *str = ptr;
6158 return(entity);
6159}
6160
6161/**
6162 * xmlParseDocTypeDecl:
6163 * @ctxt: an XML parser context
6164 *
6165 * parse a DOCTYPE declaration
6166 *
6167 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6168 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6169 *
6170 * [ VC: Root Element Type ]
6171 * The Name in the document type declaration must match the element
6172 * type of the root element.
6173 */
6174
6175void
6176xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006177 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006178 xmlChar *ExternalID = NULL;
6179 xmlChar *URI = NULL;
6180
6181 /*
6182 * We know that '<!DOCTYPE' has been detected.
6183 */
6184 SKIP(9);
6185
6186 SKIP_BLANKS;
6187
6188 /*
6189 * Parse the DOCTYPE name.
6190 */
6191 name = xmlParseName(ctxt);
6192 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006193 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6194 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006195 }
6196 ctxt->intSubName = name;
6197
6198 SKIP_BLANKS;
6199
6200 /*
6201 * Check for SystemID and ExternalID
6202 */
6203 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6204
6205 if ((URI != NULL) || (ExternalID != NULL)) {
6206 ctxt->hasExternalSubset = 1;
6207 }
6208 ctxt->extSubURI = URI;
6209 ctxt->extSubSystem = ExternalID;
6210
6211 SKIP_BLANKS;
6212
6213 /*
6214 * Create and update the internal subset.
6215 */
6216 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6217 (!ctxt->disableSAX))
6218 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6219
6220 /*
6221 * Is there any internal subset declarations ?
6222 * they are handled separately in xmlParseInternalSubset()
6223 */
6224 if (RAW == '[')
6225 return;
6226
6227 /*
6228 * We should be at the end of the DOCTYPE declaration.
6229 */
6230 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006231 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006232 }
6233 NEXT;
6234}
6235
6236/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006237 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006238 * @ctxt: an XML parser context
6239 *
6240 * parse the internal subset declaration
6241 *
6242 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6243 */
6244
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006245static void
Owen Taylor3473f882001-02-23 17:55:21 +00006246xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6247 /*
6248 * Is there any DTD definition ?
6249 */
6250 if (RAW == '[') {
6251 ctxt->instate = XML_PARSER_DTD;
6252 NEXT;
6253 /*
6254 * Parse the succession of Markup declarations and
6255 * PEReferences.
6256 * Subsequence (markupdecl | PEReference | S)*
6257 */
6258 while (RAW != ']') {
6259 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006260 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006261
6262 SKIP_BLANKS;
6263 xmlParseMarkupDecl(ctxt);
6264 xmlParsePEReference(ctxt);
6265
6266 /*
6267 * Pop-up of finished entities.
6268 */
6269 while ((RAW == 0) && (ctxt->inputNr > 1))
6270 xmlPopInput(ctxt);
6271
6272 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006273 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006274 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006275 break;
6276 }
6277 }
6278 if (RAW == ']') {
6279 NEXT;
6280 SKIP_BLANKS;
6281 }
6282 }
6283
6284 /*
6285 * We should be at the end of the DOCTYPE declaration.
6286 */
6287 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006288 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006289 }
6290 NEXT;
6291}
6292
Daniel Veillard81273902003-09-30 00:43:48 +00006293#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006294/**
6295 * xmlParseAttribute:
6296 * @ctxt: an XML parser context
6297 * @value: a xmlChar ** used to store the value of the attribute
6298 *
6299 * parse an attribute
6300 *
6301 * [41] Attribute ::= Name Eq AttValue
6302 *
6303 * [ WFC: No External Entity References ]
6304 * Attribute values cannot contain direct or indirect entity references
6305 * to external entities.
6306 *
6307 * [ WFC: No < in Attribute Values ]
6308 * The replacement text of any entity referred to directly or indirectly in
6309 * an attribute value (other than "&lt;") must not contain a <.
6310 *
6311 * [ VC: Attribute Value Type ]
6312 * The attribute must have been declared; the value must be of the type
6313 * declared for it.
6314 *
6315 * [25] Eq ::= S? '=' S?
6316 *
6317 * With namespace:
6318 *
6319 * [NS 11] Attribute ::= QName Eq AttValue
6320 *
6321 * Also the case QName == xmlns:??? is handled independently as a namespace
6322 * definition.
6323 *
6324 * Returns the attribute name, and the value in *value.
6325 */
6326
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006327const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006328xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006329 const xmlChar *name;
6330 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006331
6332 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006333 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006334 name = xmlParseName(ctxt);
6335 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006336 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006337 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006338 return(NULL);
6339 }
6340
6341 /*
6342 * read the value
6343 */
6344 SKIP_BLANKS;
6345 if (RAW == '=') {
6346 NEXT;
6347 SKIP_BLANKS;
6348 val = xmlParseAttValue(ctxt);
6349 ctxt->instate = XML_PARSER_CONTENT;
6350 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006351 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006352 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006353 return(NULL);
6354 }
6355
6356 /*
6357 * Check that xml:lang conforms to the specification
6358 * No more registered as an error, just generate a warning now
6359 * since this was deprecated in XML second edition
6360 */
6361 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6362 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006363 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6364 "Malformed value for xml:lang : %s\n",
6365 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 }
6367 }
6368
6369 /*
6370 * Check that xml:space conforms to the specification
6371 */
6372 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6373 if (xmlStrEqual(val, BAD_CAST "default"))
6374 *(ctxt->space) = 0;
6375 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6376 *(ctxt->space) = 1;
6377 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006378 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006379"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006380 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006381 }
6382 }
6383
6384 *value = val;
6385 return(name);
6386}
6387
6388/**
6389 * xmlParseStartTag:
6390 * @ctxt: an XML parser context
6391 *
6392 * parse a start of tag either for rule element or
6393 * EmptyElement. In both case we don't parse the tag closing chars.
6394 *
6395 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6396 *
6397 * [ WFC: Unique Att Spec ]
6398 * No attribute name may appear more than once in the same start-tag or
6399 * empty-element tag.
6400 *
6401 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6402 *
6403 * [ WFC: Unique Att Spec ]
6404 * No attribute name may appear more than once in the same start-tag or
6405 * empty-element tag.
6406 *
6407 * With namespace:
6408 *
6409 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6410 *
6411 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6412 *
6413 * Returns the element name parsed
6414 */
6415
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006416const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006417xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006418 const xmlChar *name;
6419 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006420 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006421 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006422 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006423 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006424 int i;
6425
6426 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006427 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006428
6429 name = xmlParseName(ctxt);
6430 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006431 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006432 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006433 return(NULL);
6434 }
6435
6436 /*
6437 * Now parse the attributes, it ends up with the ending
6438 *
6439 * (S Attribute)* S?
6440 */
6441 SKIP_BLANKS;
6442 GROW;
6443
Daniel Veillard21a0f912001-02-25 19:54:14 +00006444 while ((RAW != '>') &&
6445 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006446 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006447 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006448 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006449
6450 attname = xmlParseAttribute(ctxt, &attvalue);
6451 if ((attname != NULL) && (attvalue != NULL)) {
6452 /*
6453 * [ WFC: Unique Att Spec ]
6454 * No attribute name may appear more than once in the same
6455 * start-tag or empty-element tag.
6456 */
6457 for (i = 0; i < nbatts;i += 2) {
6458 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006459 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006460 xmlFree(attvalue);
6461 goto failed;
6462 }
6463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 /*
6465 * Add the pair to atts
6466 */
6467 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006468 maxatts = 22; /* allow for 10 attrs by default */
6469 atts = (const xmlChar **)
6470 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006471 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006472 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006473 if (attvalue != NULL)
6474 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006475 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006476 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006477 ctxt->atts = atts;
6478 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006479 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006480 const xmlChar **n;
6481
Owen Taylor3473f882001-02-23 17:55:21 +00006482 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006483 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006484 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006485 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006486 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006487 if (attvalue != NULL)
6488 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006489 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006490 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006491 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006492 ctxt->atts = atts;
6493 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006494 }
6495 atts[nbatts++] = attname;
6496 atts[nbatts++] = attvalue;
6497 atts[nbatts] = NULL;
6498 atts[nbatts + 1] = NULL;
6499 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006500 if (attvalue != NULL)
6501 xmlFree(attvalue);
6502 }
6503
6504failed:
6505
Daniel Veillard3772de32002-12-17 10:31:45 +00006506 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006507 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6508 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006509 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006510 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6511 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006512 }
6513 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006514 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6515 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006516 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6517 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006518 break;
6519 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006520 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006521 GROW;
6522 }
6523
6524 /*
6525 * SAX: Start of Element !
6526 */
6527 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006528 (!ctxt->disableSAX)) {
6529 if (nbatts > 0)
6530 ctxt->sax->startElement(ctxt->userData, name, atts);
6531 else
6532 ctxt->sax->startElement(ctxt->userData, name, NULL);
6533 }
Owen Taylor3473f882001-02-23 17:55:21 +00006534
6535 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006536 /* Free only the content strings */
6537 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006538 if (atts[i] != NULL)
6539 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006540 }
6541 return(name);
6542}
6543
6544/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006545 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006546 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006547 * @line: line of the start tag
6548 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006549 *
6550 * parse an end of tag
6551 *
6552 * [42] ETag ::= '</' Name S? '>'
6553 *
6554 * With namespace
6555 *
6556 * [NS 9] ETag ::= '</' QName S? '>'
6557 */
6558
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006559static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006560xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006561 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006562
6563 GROW;
6564 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006565 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006566 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006567 return;
6568 }
6569 SKIP(2);
6570
Daniel Veillard46de64e2002-05-29 08:21:33 +00006571 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006572
6573 /*
6574 * We should definitely be at the ending "S? '>'" part
6575 */
6576 GROW;
6577 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006578 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006579 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006580 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006581 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006582
6583 /*
6584 * [ WFC: Element Type Match ]
6585 * The Name in an element's end-tag must match the element type in the
6586 * start-tag.
6587 *
6588 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006589 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006590 if (name == NULL) name = BAD_CAST "unparseable";
6591 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006592 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006593 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006594 }
6595
6596 /*
6597 * SAX: End of Tag
6598 */
6599 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6600 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006601 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006602
Daniel Veillarde57ec792003-09-10 10:50:59 +00006603 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006604 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006605 return;
6606}
6607
6608/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006609 * xmlParseEndTag:
6610 * @ctxt: an XML parser context
6611 *
6612 * parse an end of tag
6613 *
6614 * [42] ETag ::= '</' Name S? '>'
6615 *
6616 * With namespace
6617 *
6618 * [NS 9] ETag ::= '</' QName S? '>'
6619 */
6620
6621void
6622xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006623 xmlParseEndTag1(ctxt, 0);
6624}
Daniel Veillard81273902003-09-30 00:43:48 +00006625#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006626
6627/************************************************************************
6628 * *
6629 * SAX 2 specific operations *
6630 * *
6631 ************************************************************************/
6632
6633static const xmlChar *
6634xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6635 int len = 0, l;
6636 int c;
6637 int count = 0;
6638
6639 /*
6640 * Handler for more complex cases
6641 */
6642 GROW;
6643 c = CUR_CHAR(l);
6644 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006645 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006646 return(NULL);
6647 }
6648
6649 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006650 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006651 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006652 (IS_COMBINING(c)) ||
6653 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006654 if (count++ > 100) {
6655 count = 0;
6656 GROW;
6657 }
6658 len += l;
6659 NEXTL(l);
6660 c = CUR_CHAR(l);
6661 }
6662 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6663}
6664
6665/*
6666 * xmlGetNamespace:
6667 * @ctxt: an XML parser context
6668 * @prefix: the prefix to lookup
6669 *
6670 * Lookup the namespace name for the @prefix (which ca be NULL)
6671 * The prefix must come from the @ctxt->dict dictionnary
6672 *
6673 * Returns the namespace name or NULL if not bound
6674 */
6675static const xmlChar *
6676xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6677 int i;
6678
Daniel Veillarde57ec792003-09-10 10:50:59 +00006679 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006680 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006681 if (ctxt->nsTab[i] == prefix) {
6682 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6683 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006684 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006685 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006686 return(NULL);
6687}
6688
6689/**
6690 * xmlParseNCName:
6691 * @ctxt: an XML parser context
6692 *
6693 * parse an XML name.
6694 *
6695 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6696 * CombiningChar | Extender
6697 *
6698 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6699 *
6700 * Returns the Name parsed or NULL
6701 */
6702
6703static const xmlChar *
6704xmlParseNCName(xmlParserCtxtPtr ctxt) {
6705 const xmlChar *in;
6706 const xmlChar *ret;
6707 int count = 0;
6708
6709 /*
6710 * Accelerator for simple ASCII names
6711 */
6712 in = ctxt->input->cur;
6713 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6714 ((*in >= 0x41) && (*in <= 0x5A)) ||
6715 (*in == '_')) {
6716 in++;
6717 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6718 ((*in >= 0x41) && (*in <= 0x5A)) ||
6719 ((*in >= 0x30) && (*in <= 0x39)) ||
6720 (*in == '_') || (*in == '-') ||
6721 (*in == '.'))
6722 in++;
6723 if ((*in > 0) && (*in < 0x80)) {
6724 count = in - ctxt->input->cur;
6725 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6726 ctxt->input->cur = in;
6727 ctxt->nbChars += count;
6728 ctxt->input->col += count;
6729 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006730 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006731 }
6732 return(ret);
6733 }
6734 }
6735 return(xmlParseNCNameComplex(ctxt));
6736}
6737
6738/**
6739 * xmlParseQName:
6740 * @ctxt: an XML parser context
6741 * @prefix: pointer to store the prefix part
6742 *
6743 * parse an XML Namespace QName
6744 *
6745 * [6] QName ::= (Prefix ':')? LocalPart
6746 * [7] Prefix ::= NCName
6747 * [8] LocalPart ::= NCName
6748 *
6749 * Returns the Name parsed or NULL
6750 */
6751
6752static const xmlChar *
6753xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6754 const xmlChar *l, *p;
6755
6756 GROW;
6757
6758 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006759 if (l == NULL) {
6760 if (CUR == ':') {
6761 l = xmlParseName(ctxt);
6762 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006763 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6764 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006765 *prefix = NULL;
6766 return(l);
6767 }
6768 }
6769 return(NULL);
6770 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006771 if (CUR == ':') {
6772 NEXT;
6773 p = l;
6774 l = xmlParseNCName(ctxt);
6775 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006776 xmlChar *tmp;
6777
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006778 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6779 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006780 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6781 p = xmlDictLookup(ctxt->dict, tmp, -1);
6782 if (tmp != NULL) xmlFree(tmp);
6783 *prefix = NULL;
6784 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006785 }
6786 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006787 xmlChar *tmp;
6788
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006789 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6790 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006791 NEXT;
6792 tmp = (xmlChar *) xmlParseName(ctxt);
6793 if (tmp != NULL) {
6794 tmp = xmlBuildQName(tmp, l, NULL, 0);
6795 l = xmlDictLookup(ctxt->dict, tmp, -1);
6796 if (tmp != NULL) xmlFree(tmp);
6797 *prefix = p;
6798 return(l);
6799 }
6800 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6801 l = xmlDictLookup(ctxt->dict, tmp, -1);
6802 if (tmp != NULL) xmlFree(tmp);
6803 *prefix = p;
6804 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006805 }
6806 *prefix = p;
6807 } else
6808 *prefix = NULL;
6809 return(l);
6810}
6811
6812/**
6813 * xmlParseQNameAndCompare:
6814 * @ctxt: an XML parser context
6815 * @name: the localname
6816 * @prefix: the prefix, if any.
6817 *
6818 * parse an XML name and compares for match
6819 * (specialized for endtag parsing)
6820 *
6821 * Returns NULL for an illegal name, (xmlChar*) 1 for success
6822 * and the name for mismatch
6823 */
6824
6825static const xmlChar *
6826xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
6827 xmlChar const *prefix) {
6828 const xmlChar *cmp = name;
6829 const xmlChar *in;
6830 const xmlChar *ret;
6831 const xmlChar *prefix2;
6832
6833 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
6834
6835 GROW;
6836 in = ctxt->input->cur;
6837
6838 cmp = prefix;
6839 while (*in != 0 && *in == *cmp) {
6840 ++in;
6841 ++cmp;
6842 }
6843 if ((*cmp == 0) && (*in == ':')) {
6844 in++;
6845 cmp = name;
6846 while (*in != 0 && *in == *cmp) {
6847 ++in;
6848 ++cmp;
6849 }
William M. Brack76e95df2003-10-18 16:20:14 +00006850 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006851 /* success */
6852 ctxt->input->cur = in;
6853 return((const xmlChar*) 1);
6854 }
6855 }
6856 /*
6857 * all strings coms from the dictionary, equality can be done directly
6858 */
6859 ret = xmlParseQName (ctxt, &prefix2);
6860 if ((ret == name) && (prefix == prefix2))
6861 return((const xmlChar*) 1);
6862 return ret;
6863}
6864
6865/**
6866 * xmlParseAttValueInternal:
6867 * @ctxt: an XML parser context
6868 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006869 * @alloc: whether the attribute was reallocated as a new string
6870 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00006871 *
6872 * parse a value for an attribute.
6873 * NOTE: if no normalization is needed, the routine will return pointers
6874 * directly from the data buffer.
6875 *
6876 * 3.3.3 Attribute-Value Normalization:
6877 * Before the value of an attribute is passed to the application or
6878 * checked for validity, the XML processor must normalize it as follows:
6879 * - a character reference is processed by appending the referenced
6880 * character to the attribute value
6881 * - an entity reference is processed by recursively processing the
6882 * replacement text of the entity
6883 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
6884 * appending #x20 to the normalized value, except that only a single
6885 * #x20 is appended for a "#xD#xA" sequence that is part of an external
6886 * parsed entity or the literal entity value of an internal parsed entity
6887 * - other characters are processed by appending them to the normalized value
6888 * If the declared value is not CDATA, then the XML processor must further
6889 * process the normalized attribute value by discarding any leading and
6890 * trailing space (#x20) characters, and by replacing sequences of space
6891 * (#x20) characters by a single space (#x20) character.
6892 * All attributes for which no declaration has been read should be treated
6893 * by a non-validating parser as if declared CDATA.
6894 *
6895 * Returns the AttValue parsed or NULL. The value has to be freed by the
6896 * caller if it was copied, this can be detected by val[*len] == 0.
6897 */
6898
6899static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006900xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
6901 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006902{
Daniel Veillard0fb18932003-09-07 09:14:37 +00006903 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006904 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006905 xmlChar *ret = NULL;
6906
6907 GROW;
6908 in = (xmlChar *) CUR_PTR;
6909 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006910 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006911 return (NULL);
6912 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006913 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00006914
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006915 /*
6916 * try to handle in this routine the most common case where no
6917 * allocation of a new string is required and where content is
6918 * pure ASCII.
6919 */
6920 limit = *in++;
6921 end = ctxt->input->end;
6922 start = in;
6923 if (in >= end) {
6924 const xmlChar *oldbase = ctxt->input->base;
6925 GROW;
6926 if (oldbase != ctxt->input->base) {
6927 long delta = ctxt->input->base - oldbase;
6928 start = start + delta;
6929 in = in + delta;
6930 }
6931 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00006932 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006933 if (normalize) {
6934 /*
6935 * Skip any leading spaces
6936 */
6937 while ((in < end) && (*in != limit) &&
6938 ((*in == 0x20) || (*in == 0x9) ||
6939 (*in == 0xA) || (*in == 0xD))) {
6940 in++;
6941 start = in;
6942 if (in >= end) {
6943 const xmlChar *oldbase = ctxt->input->base;
6944 GROW;
6945 if (oldbase != ctxt->input->base) {
6946 long delta = ctxt->input->base - oldbase;
6947 start = start + delta;
6948 in = in + delta;
6949 }
6950 end = ctxt->input->end;
6951 }
6952 }
6953 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
6954 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
6955 if ((*in++ == 0x20) && (*in == 0x20)) break;
6956 if (in >= end) {
6957 const xmlChar *oldbase = ctxt->input->base;
6958 GROW;
6959 if (oldbase != ctxt->input->base) {
6960 long delta = ctxt->input->base - oldbase;
6961 start = start + delta;
6962 in = in + delta;
6963 }
6964 end = ctxt->input->end;
6965 }
6966 }
6967 last = in;
6968 /*
6969 * skip the trailing blanks
6970 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00006971 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006972 while ((in < end) && (*in != limit) &&
6973 ((*in == 0x20) || (*in == 0x9) ||
6974 (*in == 0xA) || (*in == 0xD))) {
6975 in++;
6976 if (in >= end) {
6977 const xmlChar *oldbase = ctxt->input->base;
6978 GROW;
6979 if (oldbase != ctxt->input->base) {
6980 long delta = ctxt->input->base - oldbase;
6981 start = start + delta;
6982 in = in + delta;
6983 last = last + delta;
6984 }
6985 end = ctxt->input->end;
6986 }
6987 }
6988 if (*in != limit) goto need_complex;
6989 } else {
6990 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
6991 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
6992 in++;
6993 if (in >= end) {
6994 const xmlChar *oldbase = ctxt->input->base;
6995 GROW;
6996 if (oldbase != ctxt->input->base) {
6997 long delta = ctxt->input->base - oldbase;
6998 start = start + delta;
6999 in = in + delta;
7000 }
7001 end = ctxt->input->end;
7002 }
7003 }
7004 last = in;
7005 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007006 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007007 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007008 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007009 *len = last - start;
7010 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007011 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007012 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007013 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007014 }
7015 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007016 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007017 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007018need_complex:
7019 if (alloc) *alloc = 1;
7020 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007021}
7022
7023/**
7024 * xmlParseAttribute2:
7025 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007026 * @pref: the element prefix
7027 * @elem: the element name
7028 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007029 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007030 * @len: an int * to save the length of the attribute
7031 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007032 *
7033 * parse an attribute in the new SAX2 framework.
7034 *
7035 * Returns the attribute name, and the value in *value, .
7036 */
7037
7038static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007039xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7040 const xmlChar *pref, const xmlChar *elem,
7041 const xmlChar **prefix, xmlChar **value,
7042 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007043 const xmlChar *name;
7044 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007045 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007046
7047 *value = NULL;
7048 GROW;
7049 name = xmlParseQName(ctxt, prefix);
7050 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007051 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7052 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007053 return(NULL);
7054 }
7055
7056 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007057 * get the type if needed
7058 */
7059 if (ctxt->attsSpecial != NULL) {
7060 int type;
7061
7062 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7063 pref, elem, *prefix, name);
7064 if (type != 0) normalize = 1;
7065 }
7066
7067 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007068 * read the value
7069 */
7070 SKIP_BLANKS;
7071 if (RAW == '=') {
7072 NEXT;
7073 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007074 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007075 ctxt->instate = XML_PARSER_CONTENT;
7076 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007077 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007078 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007079 return(NULL);
7080 }
7081
7082 /*
7083 * Check that xml:lang conforms to the specification
7084 * No more registered as an error, just generate a warning now
7085 * since this was deprecated in XML second edition
7086 */
7087 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7088 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007089 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7090 "Malformed value for xml:lang : %s\n",
7091 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007092 }
7093 }
7094
7095 /*
7096 * Check that xml:space conforms to the specification
7097 */
7098 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7099 if (xmlStrEqual(val, BAD_CAST "default"))
7100 *(ctxt->space) = 0;
7101 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7102 *(ctxt->space) = 1;
7103 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007104 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007105"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7106 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007107 }
7108 }
7109
7110 *value = val;
7111 return(name);
7112}
7113
7114/**
7115 * xmlParseStartTag2:
7116 * @ctxt: an XML parser context
7117 *
7118 * parse a start of tag either for rule element or
7119 * EmptyElement. In both case we don't parse the tag closing chars.
7120 * This routine is called when running SAX2 parsing
7121 *
7122 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7123 *
7124 * [ WFC: Unique Att Spec ]
7125 * No attribute name may appear more than once in the same start-tag or
7126 * empty-element tag.
7127 *
7128 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7129 *
7130 * [ WFC: Unique Att Spec ]
7131 * No attribute name may appear more than once in the same start-tag or
7132 * empty-element tag.
7133 *
7134 * With namespace:
7135 *
7136 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7137 *
7138 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7139 *
7140 * Returns the element name parsed
7141 */
7142
7143static const xmlChar *
7144xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7145 const xmlChar **URI) {
7146 const xmlChar *localname;
7147 const xmlChar *prefix;
7148 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007149 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007150 const xmlChar *nsname;
7151 xmlChar *attvalue;
7152 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007153 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007154 int nratts, nbatts, nbdef;
7155 int i, j, nbNs, attval;
7156 const xmlChar *base;
7157 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007158
7159 if (RAW != '<') return(NULL);
7160 NEXT1;
7161
7162 /*
7163 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7164 * point since the attribute values may be stored as pointers to
7165 * the buffer and calling SHRINK would destroy them !
7166 * The Shrinking is only possible once the full set of attribute
7167 * callbacks have been done.
7168 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007169reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007170 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007171 base = ctxt->input->base;
7172 cur = ctxt->input->cur - ctxt->input->base;
7173 nbatts = 0;
7174 nratts = 0;
7175 nbdef = 0;
7176 nbNs = 0;
7177 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007178
7179 localname = xmlParseQName(ctxt, &prefix);
7180 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007181 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7182 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007183 return(NULL);
7184 }
7185
7186 /*
7187 * Now parse the attributes, it ends up with the ending
7188 *
7189 * (S Attribute)* S?
7190 */
7191 SKIP_BLANKS;
7192 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007193 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194
7195 while ((RAW != '>') &&
7196 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007197 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007198 const xmlChar *q = CUR_PTR;
7199 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007200 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007201
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007202 attname = xmlParseAttribute2(ctxt, prefix, localname,
7203 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007204 if ((attname != NULL) && (attvalue != NULL)) {
7205 if (len < 0) len = xmlStrlen(attvalue);
7206 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007207 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7208 xmlURIPtr uri;
7209
7210 if (*URL != 0) {
7211 uri = xmlParseURI((const char *) URL);
7212 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007213 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7214 "xmlns: %s not a valid URI\n",
7215 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007216 } else {
7217 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007218 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7219 "xmlns: URI %s is not absolute\n",
7220 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007221 }
7222 xmlFreeURI(uri);
7223 }
7224 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007225 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007226 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007227 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007228 for (j = 1;j <= nbNs;j++)
7229 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7230 break;
7231 if (j <= nbNs)
7232 xmlErrAttributeDup(ctxt, NULL, attname);
7233 else
7234 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007235 if (alloc != 0) xmlFree(attvalue);
7236 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007237 continue;
7238 }
7239 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007240 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7241 xmlURIPtr uri;
7242
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007243 if (attname == ctxt->str_xml) {
7244 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007245 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7246 "xml namespace prefix mapped to wrong URI\n",
7247 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007248 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007249 /*
7250 * Do not keep a namespace definition node
7251 */
7252 if (alloc != 0) xmlFree(attvalue);
7253 SKIP_BLANKS;
7254 continue;
7255 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007256 uri = xmlParseURI((const char *) URL);
7257 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007258 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7259 "xmlns:%s: '%s' is not a valid URI\n",
7260 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007261 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007262 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007263 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7264 "xmlns:%s: URI %s is not absolute\n",
7265 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007266 }
7267 xmlFreeURI(uri);
7268 }
7269
Daniel Veillard0fb18932003-09-07 09:14:37 +00007270 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007271 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007272 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007273 for (j = 1;j <= nbNs;j++)
7274 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7275 break;
7276 if (j <= nbNs)
7277 xmlErrAttributeDup(ctxt, aprefix, attname);
7278 else
7279 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007280 if (alloc != 0) xmlFree(attvalue);
7281 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007282 continue;
7283 }
7284
7285 /*
7286 * Add the pair to atts
7287 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007288 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7289 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007290 if (attvalue[len] == 0)
7291 xmlFree(attvalue);
7292 goto failed;
7293 }
7294 maxatts = ctxt->maxatts;
7295 atts = ctxt->atts;
7296 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007297 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007298 atts[nbatts++] = attname;
7299 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007300 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007301 atts[nbatts++] = attvalue;
7302 attvalue += len;
7303 atts[nbatts++] = attvalue;
7304 /*
7305 * tag if some deallocation is needed
7306 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007307 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308 } else {
7309 if ((attvalue != NULL) && (attvalue[len] == 0))
7310 xmlFree(attvalue);
7311 }
7312
7313failed:
7314
7315 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007316 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007317 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7318 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007319 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7321 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007322 }
7323 SKIP_BLANKS;
7324 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7325 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007326 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007327 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 break;
7329 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007330 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007331 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007332 }
7333
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007335 * The attributes defaulting
7336 */
7337 if (ctxt->attsDefault != NULL) {
7338 xmlDefAttrsPtr defaults;
7339
7340 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7341 if (defaults != NULL) {
7342 for (i = 0;i < defaults->nbAttrs;i++) {
7343 attname = defaults->values[4 * i];
7344 aprefix = defaults->values[4 * i + 1];
7345
7346 /*
7347 * special work for namespaces defaulted defs
7348 */
7349 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7350 /*
7351 * check that it's not a defined namespace
7352 */
7353 for (j = 1;j <= nbNs;j++)
7354 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7355 break;
7356 if (j <= nbNs) continue;
7357
7358 nsname = xmlGetNamespace(ctxt, NULL);
7359 if (nsname != defaults->values[4 * i + 2]) {
7360 if (nsPush(ctxt, NULL,
7361 defaults->values[4 * i + 2]) > 0)
7362 nbNs++;
7363 }
7364 } else if (aprefix == ctxt->str_xmlns) {
7365 /*
7366 * check that it's not a defined namespace
7367 */
7368 for (j = 1;j <= nbNs;j++)
7369 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7370 break;
7371 if (j <= nbNs) continue;
7372
7373 nsname = xmlGetNamespace(ctxt, attname);
7374 if (nsname != defaults->values[2]) {
7375 if (nsPush(ctxt, attname,
7376 defaults->values[4 * i + 2]) > 0)
7377 nbNs++;
7378 }
7379 } else {
7380 /*
7381 * check that it's not a defined attribute
7382 */
7383 for (j = 0;j < nbatts;j+=5) {
7384 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7385 break;
7386 }
7387 if (j < nbatts) continue;
7388
7389 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7390 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007391 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007392 }
7393 maxatts = ctxt->maxatts;
7394 atts = ctxt->atts;
7395 }
7396 atts[nbatts++] = attname;
7397 atts[nbatts++] = aprefix;
7398 if (aprefix == NULL)
7399 atts[nbatts++] = NULL;
7400 else
7401 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7402 atts[nbatts++] = defaults->values[4 * i + 2];
7403 atts[nbatts++] = defaults->values[4 * i + 3];
7404 nbdef++;
7405 }
7406 }
7407 }
7408 }
7409
Daniel Veillarde70c8772003-11-25 07:21:18 +00007410 /*
7411 * The attributes checkings
7412 */
7413 for (i = 0; i < nbatts;i += 5) {
7414 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7415 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7416 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7417 "Namespace prefix %s for %s on %s is not defined\n",
7418 atts[i + 1], atts[i], localname);
7419 }
7420 atts[i + 2] = nsname;
7421 /*
7422 * [ WFC: Unique Att Spec ]
7423 * No attribute name may appear more than once in the same
7424 * start-tag or empty-element tag.
7425 * As extended by the Namespace in XML REC.
7426 */
7427 for (j = 0; j < i;j += 5) {
7428 if (atts[i] == atts[j]) {
7429 if (atts[i+1] == atts[j+1]) {
7430 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7431 break;
7432 }
7433 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7434 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7435 "Namespaced Attribute %s in '%s' redefined\n",
7436 atts[i], nsname, NULL);
7437 break;
7438 }
7439 }
7440 }
7441 }
7442
Daniel Veillarde57ec792003-09-10 10:50:59 +00007443 nsname = xmlGetNamespace(ctxt, prefix);
7444 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007445 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7446 "Namespace prefix %s on %s is not defined\n",
7447 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007448 }
7449 *pref = prefix;
7450 *URI = nsname;
7451
7452 /*
7453 * SAX: Start of Element !
7454 */
7455 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7456 (!ctxt->disableSAX)) {
7457 if (nbNs > 0)
7458 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7459 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7460 nbatts / 5, nbdef, atts);
7461 else
7462 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7463 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7464 }
7465
7466 /*
7467 * Free up attribute allocated strings if needed
7468 */
7469 if (attval != 0) {
7470 for (i = 3,j = 0; j < nratts;i += 5,j++)
7471 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7472 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007473 }
7474
7475 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007476
7477base_changed:
7478 /*
7479 * the attribute strings are valid iif the base didn't changed
7480 */
7481 if (attval != 0) {
7482 for (i = 3,j = 0; j < nratts;i += 5,j++)
7483 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7484 xmlFree((xmlChar *) atts[i]);
7485 }
7486 ctxt->input->cur = ctxt->input->base + cur;
7487 if (ctxt->wellFormed == 1) {
7488 goto reparse;
7489 }
7490 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007491}
7492
7493/**
7494 * xmlParseEndTag2:
7495 * @ctxt: an XML parser context
7496 * @line: line of the start tag
7497 * @nsNr: number of namespaces on the start tag
7498 *
7499 * parse an end of tag
7500 *
7501 * [42] ETag ::= '</' Name S? '>'
7502 *
7503 * With namespace
7504 *
7505 * [NS 9] ETag ::= '</' QName S? '>'
7506 */
7507
7508static void
7509xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7510 const xmlChar *URI, int line, int nsNr) {
7511 const xmlChar *name;
7512
7513 GROW;
7514 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007515 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 return;
7517 }
7518 SKIP(2);
7519
7520 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7521
7522 /*
7523 * We should definitely be at the ending "S? '>'" part
7524 */
7525 GROW;
7526 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007527 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007528 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007529 } else
7530 NEXT1;
7531
7532 /*
7533 * [ WFC: Element Type Match ]
7534 * The Name in an element's end-tag must match the element type in the
7535 * start-tag.
7536 *
7537 */
7538 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007539 if (name == NULL) name = BAD_CAST "unparseable";
7540 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007541 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007542 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007543 }
7544
7545 /*
7546 * SAX: End of Tag
7547 */
7548 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7549 (!ctxt->disableSAX))
7550 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7551
Daniel Veillard0fb18932003-09-07 09:14:37 +00007552 spacePop(ctxt);
7553 if (nsNr != 0)
7554 nsPop(ctxt, nsNr);
7555 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007556}
7557
7558/**
Owen Taylor3473f882001-02-23 17:55:21 +00007559 * xmlParseCDSect:
7560 * @ctxt: an XML parser context
7561 *
7562 * Parse escaped pure raw content.
7563 *
7564 * [18] CDSect ::= CDStart CData CDEnd
7565 *
7566 * [19] CDStart ::= '<![CDATA['
7567 *
7568 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7569 *
7570 * [21] CDEnd ::= ']]>'
7571 */
7572void
7573xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7574 xmlChar *buf = NULL;
7575 int len = 0;
7576 int size = XML_PARSER_BUFFER_SIZE;
7577 int r, rl;
7578 int s, sl;
7579 int cur, l;
7580 int count = 0;
7581
Daniel Veillard8f597c32003-10-06 08:19:27 +00007582 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007583 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007584 SKIP(9);
7585 } else
7586 return;
7587
7588 ctxt->instate = XML_PARSER_CDATA_SECTION;
7589 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007590 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007591 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007592 ctxt->instate = XML_PARSER_CONTENT;
7593 return;
7594 }
7595 NEXTL(rl);
7596 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007597 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007598 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007599 ctxt->instate = XML_PARSER_CONTENT;
7600 return;
7601 }
7602 NEXTL(sl);
7603 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007604 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007605 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007606 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007607 return;
7608 }
William M. Brack871611b2003-10-18 04:53:14 +00007609 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007610 ((r != ']') || (s != ']') || (cur != '>'))) {
7611 if (len + 5 >= size) {
7612 size *= 2;
7613 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7614 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007615 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007616 return;
7617 }
7618 }
7619 COPY_BUF(rl,buf,len,r);
7620 r = s;
7621 rl = sl;
7622 s = cur;
7623 sl = l;
7624 count++;
7625 if (count > 50) {
7626 GROW;
7627 count = 0;
7628 }
7629 NEXTL(l);
7630 cur = CUR_CHAR(l);
7631 }
7632 buf[len] = 0;
7633 ctxt->instate = XML_PARSER_CONTENT;
7634 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007635 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007636 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007637 xmlFree(buf);
7638 return;
7639 }
7640 NEXTL(l);
7641
7642 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007643 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007644 */
7645 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7646 if (ctxt->sax->cdataBlock != NULL)
7647 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007648 else if (ctxt->sax->characters != NULL)
7649 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007650 }
7651 xmlFree(buf);
7652}
7653
7654/**
7655 * xmlParseContent:
7656 * @ctxt: an XML parser context
7657 *
7658 * Parse a content:
7659 *
7660 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7661 */
7662
7663void
7664xmlParseContent(xmlParserCtxtPtr ctxt) {
7665 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007666 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007667 ((RAW != '<') || (NXT(1) != '/'))) {
7668 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007669 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007670 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007671
7672 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007673 * First case : a Processing Instruction.
7674 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007675 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007676 xmlParsePI(ctxt);
7677 }
7678
7679 /*
7680 * Second case : a CDSection
7681 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007682 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007683 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007684 xmlParseCDSect(ctxt);
7685 }
7686
7687 /*
7688 * Third case : a comment
7689 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007690 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007691 (NXT(2) == '-') && (NXT(3) == '-')) {
7692 xmlParseComment(ctxt);
7693 ctxt->instate = XML_PARSER_CONTENT;
7694 }
7695
7696 /*
7697 * Fourth case : a sub-element.
7698 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007699 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007700 xmlParseElement(ctxt);
7701 }
7702
7703 /*
7704 * Fifth case : a reference. If if has not been resolved,
7705 * parsing returns it's Name, create the node
7706 */
7707
Daniel Veillard21a0f912001-02-25 19:54:14 +00007708 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007709 xmlParseReference(ctxt);
7710 }
7711
7712 /*
7713 * Last case, text. Note that References are handled directly.
7714 */
7715 else {
7716 xmlParseCharData(ctxt, 0);
7717 }
7718
7719 GROW;
7720 /*
7721 * Pop-up of finished entities.
7722 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007723 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007724 xmlPopInput(ctxt);
7725 SHRINK;
7726
Daniel Veillardfdc91562002-07-01 21:52:03 +00007727 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007728 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7729 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007730 ctxt->instate = XML_PARSER_EOF;
7731 break;
7732 }
7733 }
7734}
7735
7736/**
7737 * xmlParseElement:
7738 * @ctxt: an XML parser context
7739 *
7740 * parse an XML element, this is highly recursive
7741 *
7742 * [39] element ::= EmptyElemTag | STag content ETag
7743 *
7744 * [ WFC: Element Type Match ]
7745 * The Name in an element's end-tag must match the element type in the
7746 * start-tag.
7747 *
Owen Taylor3473f882001-02-23 17:55:21 +00007748 */
7749
7750void
7751xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007752 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753 const xmlChar *prefix;
7754 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007755 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007756 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00007757 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007758 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007759
7760 /* Capture start position */
7761 if (ctxt->record_info) {
7762 node_info.begin_pos = ctxt->input->consumed +
7763 (CUR_PTR - ctxt->input->base);
7764 node_info.begin_line = ctxt->input->line;
7765 }
7766
7767 if (ctxt->spaceNr == 0)
7768 spacePush(ctxt, -1);
7769 else
7770 spacePush(ctxt, *ctxt->space);
7771
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007772 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007773#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007775#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007776 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007777#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007778 else
7779 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007780#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007781 if (name == NULL) {
7782 spacePop(ctxt);
7783 return;
7784 }
7785 namePush(ctxt, name);
7786 ret = ctxt->node;
7787
Daniel Veillard4432df22003-09-28 18:58:27 +00007788#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007789 /*
7790 * [ VC: Root Element Type ]
7791 * The Name in the document type declaration must match the element
7792 * type of the root element.
7793 */
7794 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7795 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7796 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00007797#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007798
7799 /*
7800 * Check for an Empty Element.
7801 */
7802 if ((RAW == '/') && (NXT(1) == '>')) {
7803 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007804 if (ctxt->sax2) {
7805 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7806 (!ctxt->disableSAX))
7807 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00007808#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007809 } else {
7810 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7811 (!ctxt->disableSAX))
7812 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00007813#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007814 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815 namePop(ctxt);
7816 spacePop(ctxt);
7817 if (nsNr != ctxt->nsNr)
7818 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007819 if ( ret != NULL && ctxt->record_info ) {
7820 node_info.end_pos = ctxt->input->consumed +
7821 (CUR_PTR - ctxt->input->base);
7822 node_info.end_line = ctxt->input->line;
7823 node_info.node = ret;
7824 xmlParserAddNodeInfo(ctxt, &node_info);
7825 }
7826 return;
7827 }
7828 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00007829 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007830 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007831 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
7832 "Couldn't find end of Start Tag %s line %d\n",
7833 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007834
7835 /*
7836 * end of parsing of this node.
7837 */
7838 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007839 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007840 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007841 if (nsNr != ctxt->nsNr)
7842 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007843
7844 /*
7845 * Capture end position and add node
7846 */
7847 if ( ret != NULL && ctxt->record_info ) {
7848 node_info.end_pos = ctxt->input->consumed +
7849 (CUR_PTR - ctxt->input->base);
7850 node_info.end_line = ctxt->input->line;
7851 node_info.node = ret;
7852 xmlParserAddNodeInfo(ctxt, &node_info);
7853 }
7854 return;
7855 }
7856
7857 /*
7858 * Parse the content of the element:
7859 */
7860 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007861 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007862 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00007863 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007864 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007865
7866 /*
7867 * end of parsing of this node.
7868 */
7869 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007870 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007871 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872 if (nsNr != ctxt->nsNr)
7873 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00007874 return;
7875 }
7876
7877 /*
7878 * parse the end of tag: '</' should be here.
7879 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007880 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007881 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007882 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007883 }
7884#ifdef LIBXML_SAX1_ENABLED
7885 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00007886 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00007887#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007888
7889 /*
7890 * Capture end position and add node
7891 */
7892 if ( ret != NULL && ctxt->record_info ) {
7893 node_info.end_pos = ctxt->input->consumed +
7894 (CUR_PTR - ctxt->input->base);
7895 node_info.end_line = ctxt->input->line;
7896 node_info.node = ret;
7897 xmlParserAddNodeInfo(ctxt, &node_info);
7898 }
7899}
7900
7901/**
7902 * xmlParseVersionNum:
7903 * @ctxt: an XML parser context
7904 *
7905 * parse the XML version value.
7906 *
7907 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7908 *
7909 * Returns the string giving the XML version number, or NULL
7910 */
7911xmlChar *
7912xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7913 xmlChar *buf = NULL;
7914 int len = 0;
7915 int size = 10;
7916 xmlChar cur;
7917
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007918 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007919 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007920 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007921 return(NULL);
7922 }
7923 cur = CUR;
7924 while (((cur >= 'a') && (cur <= 'z')) ||
7925 ((cur >= 'A') && (cur <= 'Z')) ||
7926 ((cur >= '0') && (cur <= '9')) ||
7927 (cur == '_') || (cur == '.') ||
7928 (cur == ':') || (cur == '-')) {
7929 if (len + 1 >= size) {
7930 size *= 2;
7931 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7932 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007933 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007934 return(NULL);
7935 }
7936 }
7937 buf[len++] = cur;
7938 NEXT;
7939 cur=CUR;
7940 }
7941 buf[len] = 0;
7942 return(buf);
7943}
7944
7945/**
7946 * xmlParseVersionInfo:
7947 * @ctxt: an XML parser context
7948 *
7949 * parse the XML version.
7950 *
7951 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7952 *
7953 * [25] Eq ::= S? '=' S?
7954 *
7955 * Returns the version string, e.g. "1.0"
7956 */
7957
7958xmlChar *
7959xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7960 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007961
Daniel Veillarda07050d2003-10-19 14:46:32 +00007962 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007963 SKIP(7);
7964 SKIP_BLANKS;
7965 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007966 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007967 return(NULL);
7968 }
7969 NEXT;
7970 SKIP_BLANKS;
7971 if (RAW == '"') {
7972 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007973 version = xmlParseVersionNum(ctxt);
7974 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007975 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007976 } else
7977 NEXT;
7978 } else if (RAW == '\''){
7979 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007980 version = xmlParseVersionNum(ctxt);
7981 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007982 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007983 } else
7984 NEXT;
7985 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007986 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007987 }
7988 }
7989 return(version);
7990}
7991
7992/**
7993 * xmlParseEncName:
7994 * @ctxt: an XML parser context
7995 *
7996 * parse the XML encoding name
7997 *
7998 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7999 *
8000 * Returns the encoding name value or NULL
8001 */
8002xmlChar *
8003xmlParseEncName(xmlParserCtxtPtr ctxt) {
8004 xmlChar *buf = NULL;
8005 int len = 0;
8006 int size = 10;
8007 xmlChar cur;
8008
8009 cur = CUR;
8010 if (((cur >= 'a') && (cur <= 'z')) ||
8011 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008012 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008013 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008014 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008015 return(NULL);
8016 }
8017
8018 buf[len++] = cur;
8019 NEXT;
8020 cur = CUR;
8021 while (((cur >= 'a') && (cur <= 'z')) ||
8022 ((cur >= 'A') && (cur <= 'Z')) ||
8023 ((cur >= '0') && (cur <= '9')) ||
8024 (cur == '.') || (cur == '_') ||
8025 (cur == '-')) {
8026 if (len + 1 >= size) {
8027 size *= 2;
8028 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8029 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008030 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008031 return(NULL);
8032 }
8033 }
8034 buf[len++] = cur;
8035 NEXT;
8036 cur = CUR;
8037 if (cur == 0) {
8038 SHRINK;
8039 GROW;
8040 cur = CUR;
8041 }
8042 }
8043 buf[len] = 0;
8044 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008045 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008046 }
8047 return(buf);
8048}
8049
8050/**
8051 * xmlParseEncodingDecl:
8052 * @ctxt: an XML parser context
8053 *
8054 * parse the XML encoding declaration
8055 *
8056 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8057 *
8058 * this setups the conversion filters.
8059 *
8060 * Returns the encoding value or NULL
8061 */
8062
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008063const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008064xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8065 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008066
8067 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008068 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008069 SKIP(8);
8070 SKIP_BLANKS;
8071 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008072 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008073 return(NULL);
8074 }
8075 NEXT;
8076 SKIP_BLANKS;
8077 if (RAW == '"') {
8078 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008079 encoding = xmlParseEncName(ctxt);
8080 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008081 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008082 } else
8083 NEXT;
8084 } else if (RAW == '\''){
8085 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008086 encoding = xmlParseEncName(ctxt);
8087 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008088 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008089 } else
8090 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008091 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008092 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008093 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008094 /*
8095 * UTF-16 encoding stwich has already taken place at this stage,
8096 * more over the little-endian/big-endian selection is already done
8097 */
8098 if ((encoding != NULL) &&
8099 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8100 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008101 if (ctxt->encoding != NULL)
8102 xmlFree((xmlChar *) ctxt->encoding);
8103 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008104 }
8105 /*
8106 * UTF-8 encoding is handled natively
8107 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008108 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008109 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8110 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008111 if (ctxt->encoding != NULL)
8112 xmlFree((xmlChar *) ctxt->encoding);
8113 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008114 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008115 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008116 xmlCharEncodingHandlerPtr handler;
8117
8118 if (ctxt->input->encoding != NULL)
8119 xmlFree((xmlChar *) ctxt->input->encoding);
8120 ctxt->input->encoding = encoding;
8121
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008122 handler = xmlFindCharEncodingHandler((const char *) encoding);
8123 if (handler != NULL) {
8124 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008125 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008126 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008127 "Unsupported encoding %s\n", encoding);
8128 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008129 }
8130 }
8131 }
8132 return(encoding);
8133}
8134
8135/**
8136 * xmlParseSDDecl:
8137 * @ctxt: an XML parser context
8138 *
8139 * parse the XML standalone declaration
8140 *
8141 * [32] SDDecl ::= S 'standalone' Eq
8142 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8143 *
8144 * [ VC: Standalone Document Declaration ]
8145 * TODO The standalone document declaration must have the value "no"
8146 * if any external markup declarations contain declarations of:
8147 * - attributes with default values, if elements to which these
8148 * attributes apply appear in the document without specifications
8149 * of values for these attributes, or
8150 * - entities (other than amp, lt, gt, apos, quot), if references
8151 * to those entities appear in the document, or
8152 * - attributes with values subject to normalization, where the
8153 * attribute appears in the document with a value which will change
8154 * as a result of normalization, or
8155 * - element types with element content, if white space occurs directly
8156 * within any instance of those types.
8157 *
8158 * Returns 1 if standalone, 0 otherwise
8159 */
8160
8161int
8162xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8163 int standalone = -1;
8164
8165 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008166 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008167 SKIP(10);
8168 SKIP_BLANKS;
8169 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008170 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008171 return(standalone);
8172 }
8173 NEXT;
8174 SKIP_BLANKS;
8175 if (RAW == '\''){
8176 NEXT;
8177 if ((RAW == 'n') && (NXT(1) == 'o')) {
8178 standalone = 0;
8179 SKIP(2);
8180 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8181 (NXT(2) == 's')) {
8182 standalone = 1;
8183 SKIP(3);
8184 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008185 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008186 }
8187 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008188 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008189 } else
8190 NEXT;
8191 } else if (RAW == '"'){
8192 NEXT;
8193 if ((RAW == 'n') && (NXT(1) == 'o')) {
8194 standalone = 0;
8195 SKIP(2);
8196 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8197 (NXT(2) == 's')) {
8198 standalone = 1;
8199 SKIP(3);
8200 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008201 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008202 }
8203 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008204 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008205 } else
8206 NEXT;
8207 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008208 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008209 }
8210 }
8211 return(standalone);
8212}
8213
8214/**
8215 * xmlParseXMLDecl:
8216 * @ctxt: an XML parser context
8217 *
8218 * parse an XML declaration header
8219 *
8220 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8221 */
8222
8223void
8224xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8225 xmlChar *version;
8226
8227 /*
8228 * We know that '<?xml' is here.
8229 */
8230 SKIP(5);
8231
William M. Brack76e95df2003-10-18 16:20:14 +00008232 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008233 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8234 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008235 }
8236 SKIP_BLANKS;
8237
8238 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008239 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008240 */
8241 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008242 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008243 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008244 } else {
8245 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8246 /*
8247 * TODO: Blueberry should be detected here
8248 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008249 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8250 "Unsupported version '%s'\n",
8251 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008252 }
8253 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008254 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008255 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008256 }
Owen Taylor3473f882001-02-23 17:55:21 +00008257
8258 /*
8259 * We may have the encoding declaration
8260 */
William M. Brack76e95df2003-10-18 16:20:14 +00008261 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008262 if ((RAW == '?') && (NXT(1) == '>')) {
8263 SKIP(2);
8264 return;
8265 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008267 }
8268 xmlParseEncodingDecl(ctxt);
8269 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8270 /*
8271 * The XML REC instructs us to stop parsing right here
8272 */
8273 return;
8274 }
8275
8276 /*
8277 * We may have the standalone status.
8278 */
William M. Brack76e95df2003-10-18 16:20:14 +00008279 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008280 if ((RAW == '?') && (NXT(1) == '>')) {
8281 SKIP(2);
8282 return;
8283 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008284 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008285 }
8286 SKIP_BLANKS;
8287 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8288
8289 SKIP_BLANKS;
8290 if ((RAW == '?') && (NXT(1) == '>')) {
8291 SKIP(2);
8292 } else if (RAW == '>') {
8293 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008294 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008295 NEXT;
8296 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008297 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008298 MOVETO_ENDTAG(CUR_PTR);
8299 NEXT;
8300 }
8301}
8302
8303/**
8304 * xmlParseMisc:
8305 * @ctxt: an XML parser context
8306 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008307 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008308 *
8309 * [27] Misc ::= Comment | PI | S
8310 */
8311
8312void
8313xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008314 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008315 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008316 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008317 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008318 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008319 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008320 NEXT;
8321 } else
8322 xmlParseComment(ctxt);
8323 }
8324}
8325
8326/**
8327 * xmlParseDocument:
8328 * @ctxt: an XML parser context
8329 *
8330 * parse an XML document (and build a tree if using the standard SAX
8331 * interface).
8332 *
8333 * [1] document ::= prolog element Misc*
8334 *
8335 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8336 *
8337 * Returns 0, -1 in case of error. the parser context is augmented
8338 * as a result of the parsing.
8339 */
8340
8341int
8342xmlParseDocument(xmlParserCtxtPtr ctxt) {
8343 xmlChar start[4];
8344 xmlCharEncoding enc;
8345
8346 xmlInitParser();
8347
8348 GROW;
8349
8350 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008351 * SAX: detecting the level.
8352 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008353 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008354
8355 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008356 * SAX: beginning of the document processing.
8357 */
8358 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8359 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8360
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008361 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8362 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008363 /*
8364 * Get the 4 first bytes and decode the charset
8365 * if enc != XML_CHAR_ENCODING_NONE
8366 * plug some encoding conversion routines.
8367 */
8368 start[0] = RAW;
8369 start[1] = NXT(1);
8370 start[2] = NXT(2);
8371 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008372 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008373 if (enc != XML_CHAR_ENCODING_NONE) {
8374 xmlSwitchEncoding(ctxt, enc);
8375 }
Owen Taylor3473f882001-02-23 17:55:21 +00008376 }
8377
8378
8379 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008380 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008381 }
8382
8383 /*
8384 * Check for the XMLDecl in the Prolog.
8385 */
8386 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008387 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008388
8389 /*
8390 * Note that we will switch encoding on the fly.
8391 */
8392 xmlParseXMLDecl(ctxt);
8393 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8394 /*
8395 * The XML REC instructs us to stop parsing right here
8396 */
8397 return(-1);
8398 }
8399 ctxt->standalone = ctxt->input->standalone;
8400 SKIP_BLANKS;
8401 } else {
8402 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8403 }
8404 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8405 ctxt->sax->startDocument(ctxt->userData);
8406
8407 /*
8408 * The Misc part of the Prolog
8409 */
8410 GROW;
8411 xmlParseMisc(ctxt);
8412
8413 /*
8414 * Then possibly doc type declaration(s) and more Misc
8415 * (doctypedecl Misc*)?
8416 */
8417 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008418 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008419
8420 ctxt->inSubset = 1;
8421 xmlParseDocTypeDecl(ctxt);
8422 if (RAW == '[') {
8423 ctxt->instate = XML_PARSER_DTD;
8424 xmlParseInternalSubset(ctxt);
8425 }
8426
8427 /*
8428 * Create and update the external subset.
8429 */
8430 ctxt->inSubset = 2;
8431 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8432 (!ctxt->disableSAX))
8433 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8434 ctxt->extSubSystem, ctxt->extSubURI);
8435 ctxt->inSubset = 0;
8436
8437
8438 ctxt->instate = XML_PARSER_PROLOG;
8439 xmlParseMisc(ctxt);
8440 }
8441
8442 /*
8443 * Time to start parsing the tree itself
8444 */
8445 GROW;
8446 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008447 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8448 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008449 } else {
8450 ctxt->instate = XML_PARSER_CONTENT;
8451 xmlParseElement(ctxt);
8452 ctxt->instate = XML_PARSER_EPILOG;
8453
8454
8455 /*
8456 * The Misc part at the end
8457 */
8458 xmlParseMisc(ctxt);
8459
Daniel Veillard561b7f82002-03-20 21:55:57 +00008460 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008461 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008462 }
8463 ctxt->instate = XML_PARSER_EOF;
8464 }
8465
8466 /*
8467 * SAX: end of the document processing.
8468 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008469 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008470 ctxt->sax->endDocument(ctxt->userData);
8471
Daniel Veillard5997aca2002-03-18 18:36:20 +00008472 /*
8473 * Remove locally kept entity definitions if the tree was not built
8474 */
8475 if ((ctxt->myDoc != NULL) &&
8476 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8477 xmlFreeDoc(ctxt->myDoc);
8478 ctxt->myDoc = NULL;
8479 }
8480
Daniel Veillardc7612992002-02-17 22:47:37 +00008481 if (! ctxt->wellFormed) {
8482 ctxt->valid = 0;
8483 return(-1);
8484 }
Owen Taylor3473f882001-02-23 17:55:21 +00008485 return(0);
8486}
8487
8488/**
8489 * xmlParseExtParsedEnt:
8490 * @ctxt: an XML parser context
8491 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008492 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008493 * An external general parsed entity is well-formed if it matches the
8494 * production labeled extParsedEnt.
8495 *
8496 * [78] extParsedEnt ::= TextDecl? content
8497 *
8498 * Returns 0, -1 in case of error. the parser context is augmented
8499 * as a result of the parsing.
8500 */
8501
8502int
8503xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8504 xmlChar start[4];
8505 xmlCharEncoding enc;
8506
8507 xmlDefaultSAXHandlerInit();
8508
Daniel Veillard309f81d2003-09-23 09:02:53 +00008509 xmlDetectSAX2(ctxt);
8510
Owen Taylor3473f882001-02-23 17:55:21 +00008511 GROW;
8512
8513 /*
8514 * SAX: beginning of the document processing.
8515 */
8516 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8517 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8518
8519 /*
8520 * Get the 4 first bytes and decode the charset
8521 * if enc != XML_CHAR_ENCODING_NONE
8522 * plug some encoding conversion routines.
8523 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008524 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8525 start[0] = RAW;
8526 start[1] = NXT(1);
8527 start[2] = NXT(2);
8528 start[3] = NXT(3);
8529 enc = xmlDetectCharEncoding(start, 4);
8530 if (enc != XML_CHAR_ENCODING_NONE) {
8531 xmlSwitchEncoding(ctxt, enc);
8532 }
Owen Taylor3473f882001-02-23 17:55:21 +00008533 }
8534
8535
8536 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008537 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008538 }
8539
8540 /*
8541 * Check for the XMLDecl in the Prolog.
8542 */
8543 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008544 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008545
8546 /*
8547 * Note that we will switch encoding on the fly.
8548 */
8549 xmlParseXMLDecl(ctxt);
8550 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8551 /*
8552 * The XML REC instructs us to stop parsing right here
8553 */
8554 return(-1);
8555 }
8556 SKIP_BLANKS;
8557 } else {
8558 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8559 }
8560 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8561 ctxt->sax->startDocument(ctxt->userData);
8562
8563 /*
8564 * Doing validity checking on chunk doesn't make sense
8565 */
8566 ctxt->instate = XML_PARSER_CONTENT;
8567 ctxt->validate = 0;
8568 ctxt->loadsubset = 0;
8569 ctxt->depth = 0;
8570
8571 xmlParseContent(ctxt);
8572
8573 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008574 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008575 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008576 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008577 }
8578
8579 /*
8580 * SAX: end of the document processing.
8581 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008582 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008583 ctxt->sax->endDocument(ctxt->userData);
8584
8585 if (! ctxt->wellFormed) return(-1);
8586 return(0);
8587}
8588
Daniel Veillard73b013f2003-09-30 12:36:01 +00008589#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008590/************************************************************************
8591 * *
8592 * Progressive parsing interfaces *
8593 * *
8594 ************************************************************************/
8595
8596/**
8597 * xmlParseLookupSequence:
8598 * @ctxt: an XML parser context
8599 * @first: the first char to lookup
8600 * @next: the next char to lookup or zero
8601 * @third: the next char to lookup or zero
8602 *
8603 * Try to find if a sequence (first, next, third) or just (first next) or
8604 * (first) is available in the input stream.
8605 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8606 * to avoid rescanning sequences of bytes, it DOES change the state of the
8607 * parser, do not use liberally.
8608 *
8609 * Returns the index to the current parsing point if the full sequence
8610 * is available, -1 otherwise.
8611 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008612static int
Owen Taylor3473f882001-02-23 17:55:21 +00008613xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8614 xmlChar next, xmlChar third) {
8615 int base, len;
8616 xmlParserInputPtr in;
8617 const xmlChar *buf;
8618
8619 in = ctxt->input;
8620 if (in == NULL) return(-1);
8621 base = in->cur - in->base;
8622 if (base < 0) return(-1);
8623 if (ctxt->checkIndex > base)
8624 base = ctxt->checkIndex;
8625 if (in->buf == NULL) {
8626 buf = in->base;
8627 len = in->length;
8628 } else {
8629 buf = in->buf->buffer->content;
8630 len = in->buf->buffer->use;
8631 }
8632 /* take into account the sequence length */
8633 if (third) len -= 2;
8634 else if (next) len --;
8635 for (;base < len;base++) {
8636 if (buf[base] == first) {
8637 if (third != 0) {
8638 if ((buf[base + 1] != next) ||
8639 (buf[base + 2] != third)) continue;
8640 } else if (next != 0) {
8641 if (buf[base + 1] != next) continue;
8642 }
8643 ctxt->checkIndex = 0;
8644#ifdef DEBUG_PUSH
8645 if (next == 0)
8646 xmlGenericError(xmlGenericErrorContext,
8647 "PP: lookup '%c' found at %d\n",
8648 first, base);
8649 else if (third == 0)
8650 xmlGenericError(xmlGenericErrorContext,
8651 "PP: lookup '%c%c' found at %d\n",
8652 first, next, base);
8653 else
8654 xmlGenericError(xmlGenericErrorContext,
8655 "PP: lookup '%c%c%c' found at %d\n",
8656 first, next, third, base);
8657#endif
8658 return(base - (in->cur - in->base));
8659 }
8660 }
8661 ctxt->checkIndex = base;
8662#ifdef DEBUG_PUSH
8663 if (next == 0)
8664 xmlGenericError(xmlGenericErrorContext,
8665 "PP: lookup '%c' failed\n", first);
8666 else if (third == 0)
8667 xmlGenericError(xmlGenericErrorContext,
8668 "PP: lookup '%c%c' failed\n", first, next);
8669 else
8670 xmlGenericError(xmlGenericErrorContext,
8671 "PP: lookup '%c%c%c' failed\n", first, next, third);
8672#endif
8673 return(-1);
8674}
8675
8676/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008677 * xmlParseGetLasts:
8678 * @ctxt: an XML parser context
8679 * @lastlt: pointer to store the last '<' from the input
8680 * @lastgt: pointer to store the last '>' from the input
8681 *
8682 * Lookup the last < and > in the current chunk
8683 */
8684static void
8685xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8686 const xmlChar **lastgt) {
8687 const xmlChar *tmp;
8688
8689 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8690 xmlGenericError(xmlGenericErrorContext,
8691 "Internal error: xmlParseGetLasts\n");
8692 return;
8693 }
8694 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
8695 tmp = ctxt->input->end;
8696 tmp--;
8697 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
8698 (*tmp != '>')) tmp--;
8699 if (tmp < ctxt->input->base) {
8700 *lastlt = NULL;
8701 *lastgt = NULL;
8702 } else if (*tmp == '<') {
8703 *lastlt = tmp;
8704 tmp--;
8705 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8706 if (tmp < ctxt->input->base)
8707 *lastgt = NULL;
8708 else
8709 *lastgt = tmp;
8710 } else {
8711 *lastgt = tmp;
8712 tmp--;
8713 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
8714 if (tmp < ctxt->input->base)
8715 *lastlt = NULL;
8716 else
8717 *lastlt = tmp;
8718 }
8719
8720 } else {
8721 *lastlt = NULL;
8722 *lastgt = NULL;
8723 }
8724}
8725/**
Owen Taylor3473f882001-02-23 17:55:21 +00008726 * xmlParseTryOrFinish:
8727 * @ctxt: an XML parser context
8728 * @terminate: last chunk indicator
8729 *
8730 * Try to progress on parsing
8731 *
8732 * Returns zero if no parsing was possible
8733 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008734static int
Owen Taylor3473f882001-02-23 17:55:21 +00008735xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8736 int ret = 0;
8737 int avail;
8738 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008739 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008740
8741#ifdef DEBUG_PUSH
8742 switch (ctxt->instate) {
8743 case XML_PARSER_EOF:
8744 xmlGenericError(xmlGenericErrorContext,
8745 "PP: try EOF\n"); break;
8746 case XML_PARSER_START:
8747 xmlGenericError(xmlGenericErrorContext,
8748 "PP: try START\n"); break;
8749 case XML_PARSER_MISC:
8750 xmlGenericError(xmlGenericErrorContext,
8751 "PP: try MISC\n");break;
8752 case XML_PARSER_COMMENT:
8753 xmlGenericError(xmlGenericErrorContext,
8754 "PP: try COMMENT\n");break;
8755 case XML_PARSER_PROLOG:
8756 xmlGenericError(xmlGenericErrorContext,
8757 "PP: try PROLOG\n");break;
8758 case XML_PARSER_START_TAG:
8759 xmlGenericError(xmlGenericErrorContext,
8760 "PP: try START_TAG\n");break;
8761 case XML_PARSER_CONTENT:
8762 xmlGenericError(xmlGenericErrorContext,
8763 "PP: try CONTENT\n");break;
8764 case XML_PARSER_CDATA_SECTION:
8765 xmlGenericError(xmlGenericErrorContext,
8766 "PP: try CDATA_SECTION\n");break;
8767 case XML_PARSER_END_TAG:
8768 xmlGenericError(xmlGenericErrorContext,
8769 "PP: try END_TAG\n");break;
8770 case XML_PARSER_ENTITY_DECL:
8771 xmlGenericError(xmlGenericErrorContext,
8772 "PP: try ENTITY_DECL\n");break;
8773 case XML_PARSER_ENTITY_VALUE:
8774 xmlGenericError(xmlGenericErrorContext,
8775 "PP: try ENTITY_VALUE\n");break;
8776 case XML_PARSER_ATTRIBUTE_VALUE:
8777 xmlGenericError(xmlGenericErrorContext,
8778 "PP: try ATTRIBUTE_VALUE\n");break;
8779 case XML_PARSER_DTD:
8780 xmlGenericError(xmlGenericErrorContext,
8781 "PP: try DTD\n");break;
8782 case XML_PARSER_EPILOG:
8783 xmlGenericError(xmlGenericErrorContext,
8784 "PP: try EPILOG\n");break;
8785 case XML_PARSER_PI:
8786 xmlGenericError(xmlGenericErrorContext,
8787 "PP: try PI\n");break;
8788 case XML_PARSER_IGNORE:
8789 xmlGenericError(xmlGenericErrorContext,
8790 "PP: try IGNORE\n");break;
8791 }
8792#endif
8793
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008794 if ((ctxt->input != NULL) &&
8795 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008796 xmlSHRINK(ctxt);
8797 ctxt->checkIndex = 0;
8798 }
8799 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00008800
Daniel Veillarda880b122003-04-21 21:36:41 +00008801 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008802 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
8803 return(0);
8804
8805
Owen Taylor3473f882001-02-23 17:55:21 +00008806 /*
8807 * Pop-up of finished entities.
8808 */
8809 while ((RAW == 0) && (ctxt->inputNr > 1))
8810 xmlPopInput(ctxt);
8811
Daniel Veillard198c1bf2003-10-20 17:07:41 +00008812 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00008813 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00008814 avail = ctxt->input->length -
8815 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00008816 else {
8817 /*
8818 * If we are operating on converted input, try to flush
8819 * remainng chars to avoid them stalling in the non-converted
8820 * buffer.
8821 */
8822 if ((ctxt->input->buf->raw != NULL) &&
8823 (ctxt->input->buf->raw->use > 0)) {
8824 int base = ctxt->input->base -
8825 ctxt->input->buf->buffer->content;
8826 int current = ctxt->input->cur - ctxt->input->base;
8827
8828 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8829 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8830 ctxt->input->cur = ctxt->input->base + current;
8831 ctxt->input->end =
8832 &ctxt->input->buf->buffer->content[
8833 ctxt->input->buf->buffer->use];
8834 }
8835 avail = ctxt->input->buf->buffer->use -
8836 (ctxt->input->cur - ctxt->input->base);
8837 }
Owen Taylor3473f882001-02-23 17:55:21 +00008838 if (avail < 1)
8839 goto done;
8840 switch (ctxt->instate) {
8841 case XML_PARSER_EOF:
8842 /*
8843 * Document parsing is done !
8844 */
8845 goto done;
8846 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008847 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8848 xmlChar start[4];
8849 xmlCharEncoding enc;
8850
8851 /*
8852 * Very first chars read from the document flow.
8853 */
8854 if (avail < 4)
8855 goto done;
8856
8857 /*
8858 * Get the 4 first bytes and decode the charset
8859 * if enc != XML_CHAR_ENCODING_NONE
8860 * plug some encoding conversion routines.
8861 */
8862 start[0] = RAW;
8863 start[1] = NXT(1);
8864 start[2] = NXT(2);
8865 start[3] = NXT(3);
8866 enc = xmlDetectCharEncoding(start, 4);
8867 if (enc != XML_CHAR_ENCODING_NONE) {
8868 xmlSwitchEncoding(ctxt, enc);
8869 }
8870 break;
8871 }
Owen Taylor3473f882001-02-23 17:55:21 +00008872
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00008873 if (avail < 2)
8874 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00008875 cur = ctxt->input->cur[0];
8876 next = ctxt->input->cur[1];
8877 if (cur == 0) {
8878 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8879 ctxt->sax->setDocumentLocator(ctxt->userData,
8880 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008881 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008882 ctxt->instate = XML_PARSER_EOF;
8883#ifdef DEBUG_PUSH
8884 xmlGenericError(xmlGenericErrorContext,
8885 "PP: entering EOF\n");
8886#endif
8887 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8888 ctxt->sax->endDocument(ctxt->userData);
8889 goto done;
8890 }
8891 if ((cur == '<') && (next == '?')) {
8892 /* PI or XML decl */
8893 if (avail < 5) return(ret);
8894 if ((!terminate) &&
8895 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8896 return(ret);
8897 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8898 ctxt->sax->setDocumentLocator(ctxt->userData,
8899 &xmlDefaultSAXLocator);
8900 if ((ctxt->input->cur[2] == 'x') &&
8901 (ctxt->input->cur[3] == 'm') &&
8902 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00008903 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008904 ret += 5;
8905#ifdef DEBUG_PUSH
8906 xmlGenericError(xmlGenericErrorContext,
8907 "PP: Parsing XML Decl\n");
8908#endif
8909 xmlParseXMLDecl(ctxt);
8910 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8911 /*
8912 * The XML REC instructs us to stop parsing right
8913 * here
8914 */
8915 ctxt->instate = XML_PARSER_EOF;
8916 return(0);
8917 }
8918 ctxt->standalone = ctxt->input->standalone;
8919 if ((ctxt->encoding == NULL) &&
8920 (ctxt->input->encoding != NULL))
8921 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8922 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8923 (!ctxt->disableSAX))
8924 ctxt->sax->startDocument(ctxt->userData);
8925 ctxt->instate = XML_PARSER_MISC;
8926#ifdef DEBUG_PUSH
8927 xmlGenericError(xmlGenericErrorContext,
8928 "PP: entering MISC\n");
8929#endif
8930 } else {
8931 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8932 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8933 (!ctxt->disableSAX))
8934 ctxt->sax->startDocument(ctxt->userData);
8935 ctxt->instate = XML_PARSER_MISC;
8936#ifdef DEBUG_PUSH
8937 xmlGenericError(xmlGenericErrorContext,
8938 "PP: entering MISC\n");
8939#endif
8940 }
8941 } else {
8942 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8943 ctxt->sax->setDocumentLocator(ctxt->userData,
8944 &xmlDefaultSAXLocator);
8945 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8946 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8947 (!ctxt->disableSAX))
8948 ctxt->sax->startDocument(ctxt->userData);
8949 ctxt->instate = XML_PARSER_MISC;
8950#ifdef DEBUG_PUSH
8951 xmlGenericError(xmlGenericErrorContext,
8952 "PP: entering MISC\n");
8953#endif
8954 }
8955 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00008956 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008957 const xmlChar *name;
8958 const xmlChar *prefix;
8959 const xmlChar *URI;
8960 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00008961
8962 if ((avail < 2) && (ctxt->inputNr == 1))
8963 goto done;
8964 cur = ctxt->input->cur[0];
8965 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00008967 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00008968 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8969 ctxt->sax->endDocument(ctxt->userData);
8970 goto done;
8971 }
8972 if (!terminate) {
8973 if (ctxt->progressive) {
8974 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
8975 goto done;
8976 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
8977 goto done;
8978 }
8979 }
8980 if (ctxt->spaceNr == 0)
8981 spacePush(ctxt, -1);
8982 else
8983 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00008984#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00008985 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008986#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008987 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008988#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00008989 else
8990 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008991#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00008992 if (name == NULL) {
8993 spacePop(ctxt);
8994 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00008995 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8996 ctxt->sax->endDocument(ctxt->userData);
8997 goto done;
8998 }
Daniel Veillard4432df22003-09-28 18:58:27 +00008999#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009000 /*
9001 * [ VC: Root Element Type ]
9002 * The Name in the document type declaration must match
9003 * the element type of the root element.
9004 */
9005 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9006 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9007 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009008#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009009
9010 /*
9011 * Check for an Empty Element.
9012 */
9013 if ((RAW == '/') && (NXT(1) == '>')) {
9014 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009015
9016 if (ctxt->sax2) {
9017 if ((ctxt->sax != NULL) &&
9018 (ctxt->sax->endElementNs != NULL) &&
9019 (!ctxt->disableSAX))
9020 ctxt->sax->endElementNs(ctxt->userData, name,
9021 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009022#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009023 } else {
9024 if ((ctxt->sax != NULL) &&
9025 (ctxt->sax->endElement != NULL) &&
9026 (!ctxt->disableSAX))
9027 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009028#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009029 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009030 spacePop(ctxt);
9031 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009032 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009033 } else {
9034 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009035 }
9036 break;
9037 }
9038 if (RAW == '>') {
9039 NEXT;
9040 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009041 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009042 "Couldn't find end of Start Tag %s\n",
9043 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009044 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009045 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009046 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009047 if (ctxt->sax2)
9048 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009049#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009050 else
9051 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009052#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009053
Daniel Veillarda880b122003-04-21 21:36:41 +00009054 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009055 break;
9056 }
9057 case XML_PARSER_CONTENT: {
9058 const xmlChar *test;
9059 unsigned int cons;
9060 if ((avail < 2) && (ctxt->inputNr == 1))
9061 goto done;
9062 cur = ctxt->input->cur[0];
9063 next = ctxt->input->cur[1];
9064
9065 test = CUR_PTR;
9066 cons = ctxt->input->consumed;
9067 if ((cur == '<') && (next == '/')) {
9068 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009069 break;
9070 } else if ((cur == '<') && (next == '?')) {
9071 if ((!terminate) &&
9072 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9073 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009074 xmlParsePI(ctxt);
9075 } else if ((cur == '<') && (next != '!')) {
9076 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009077 break;
9078 } else if ((cur == '<') && (next == '!') &&
9079 (ctxt->input->cur[2] == '-') &&
9080 (ctxt->input->cur[3] == '-')) {
9081 if ((!terminate) &&
9082 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9083 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009084 xmlParseComment(ctxt);
9085 ctxt->instate = XML_PARSER_CONTENT;
9086 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9087 (ctxt->input->cur[2] == '[') &&
9088 (ctxt->input->cur[3] == 'C') &&
9089 (ctxt->input->cur[4] == 'D') &&
9090 (ctxt->input->cur[5] == 'A') &&
9091 (ctxt->input->cur[6] == 'T') &&
9092 (ctxt->input->cur[7] == 'A') &&
9093 (ctxt->input->cur[8] == '[')) {
9094 SKIP(9);
9095 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009096 break;
9097 } else if ((cur == '<') && (next == '!') &&
9098 (avail < 9)) {
9099 goto done;
9100 } else if (cur == '&') {
9101 if ((!terminate) &&
9102 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9103 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009104 xmlParseReference(ctxt);
9105 } else {
9106 /* TODO Avoid the extra copy, handle directly !!! */
9107 /*
9108 * Goal of the following test is:
9109 * - minimize calls to the SAX 'character' callback
9110 * when they are mergeable
9111 * - handle an problem for isBlank when we only parse
9112 * a sequence of blank chars and the next one is
9113 * not available to check against '<' presence.
9114 * - tries to homogenize the differences in SAX
9115 * callbacks between the push and pull versions
9116 * of the parser.
9117 */
9118 if ((ctxt->inputNr == 1) &&
9119 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9120 if (!terminate) {
9121 if (ctxt->progressive) {
9122 if ((lastlt == NULL) ||
9123 (ctxt->input->cur > lastlt))
9124 goto done;
9125 } else if (xmlParseLookupSequence(ctxt,
9126 '<', 0, 0) < 0) {
9127 goto done;
9128 }
9129 }
9130 }
9131 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 xmlParseCharData(ctxt, 0);
9133 }
9134 /*
9135 * Pop-up of finished entities.
9136 */
9137 while ((RAW == 0) && (ctxt->inputNr > 1))
9138 xmlPopInput(ctxt);
9139 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009140 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9141 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009142 ctxt->instate = XML_PARSER_EOF;
9143 break;
9144 }
9145 break;
9146 }
9147 case XML_PARSER_END_TAG:
9148 if (avail < 2)
9149 goto done;
9150 if (!terminate) {
9151 if (ctxt->progressive) {
9152 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9153 goto done;
9154 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9155 goto done;
9156 }
9157 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009158 if (ctxt->sax2) {
9159 xmlParseEndTag2(ctxt,
9160 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9161 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9162 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9163 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009164 }
9165#ifdef LIBXML_SAX1_ENABLED
9166 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009167 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009168#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009169 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009170 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009171 } else {
9172 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009173 }
9174 break;
9175 case XML_PARSER_CDATA_SECTION: {
9176 /*
9177 * The Push mode need to have the SAX callback for
9178 * cdataBlock merge back contiguous callbacks.
9179 */
9180 int base;
9181
9182 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9183 if (base < 0) {
9184 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9185 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9186 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009187 ctxt->sax->cdataBlock(ctxt->userData,
9188 ctxt->input->cur,
9189 XML_PARSER_BIG_BUFFER_SIZE);
9190 else if (ctxt->sax->characters != NULL)
9191 ctxt->sax->characters(ctxt->userData,
9192 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009193 XML_PARSER_BIG_BUFFER_SIZE);
9194 }
9195 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9196 ctxt->checkIndex = 0;
9197 }
9198 goto done;
9199 } else {
9200 if ((ctxt->sax != NULL) && (base > 0) &&
9201 (!ctxt->disableSAX)) {
9202 if (ctxt->sax->cdataBlock != NULL)
9203 ctxt->sax->cdataBlock(ctxt->userData,
9204 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009205 else if (ctxt->sax->characters != NULL)
9206 ctxt->sax->characters(ctxt->userData,
9207 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009208 }
9209 SKIP(base + 3);
9210 ctxt->checkIndex = 0;
9211 ctxt->instate = XML_PARSER_CONTENT;
9212#ifdef DEBUG_PUSH
9213 xmlGenericError(xmlGenericErrorContext,
9214 "PP: entering CONTENT\n");
9215#endif
9216 }
9217 break;
9218 }
Owen Taylor3473f882001-02-23 17:55:21 +00009219 case XML_PARSER_MISC:
9220 SKIP_BLANKS;
9221 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009222 avail = ctxt->input->length -
9223 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009224 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009225 avail = ctxt->input->buf->buffer->use -
9226 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009227 if (avail < 2)
9228 goto done;
9229 cur = ctxt->input->cur[0];
9230 next = ctxt->input->cur[1];
9231 if ((cur == '<') && (next == '?')) {
9232 if ((!terminate) &&
9233 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9234 goto done;
9235#ifdef DEBUG_PUSH
9236 xmlGenericError(xmlGenericErrorContext,
9237 "PP: Parsing PI\n");
9238#endif
9239 xmlParsePI(ctxt);
9240 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009241 (ctxt->input->cur[2] == '-') &&
9242 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009243 if ((!terminate) &&
9244 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9245 goto done;
9246#ifdef DEBUG_PUSH
9247 xmlGenericError(xmlGenericErrorContext,
9248 "PP: Parsing Comment\n");
9249#endif
9250 xmlParseComment(ctxt);
9251 ctxt->instate = XML_PARSER_MISC;
9252 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009253 (ctxt->input->cur[2] == 'D') &&
9254 (ctxt->input->cur[3] == 'O') &&
9255 (ctxt->input->cur[4] == 'C') &&
9256 (ctxt->input->cur[5] == 'T') &&
9257 (ctxt->input->cur[6] == 'Y') &&
9258 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009259 (ctxt->input->cur[8] == 'E')) {
9260 if ((!terminate) &&
9261 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9262 goto done;
9263#ifdef DEBUG_PUSH
9264 xmlGenericError(xmlGenericErrorContext,
9265 "PP: Parsing internal subset\n");
9266#endif
9267 ctxt->inSubset = 1;
9268 xmlParseDocTypeDecl(ctxt);
9269 if (RAW == '[') {
9270 ctxt->instate = XML_PARSER_DTD;
9271#ifdef DEBUG_PUSH
9272 xmlGenericError(xmlGenericErrorContext,
9273 "PP: entering DTD\n");
9274#endif
9275 } else {
9276 /*
9277 * Create and update the external subset.
9278 */
9279 ctxt->inSubset = 2;
9280 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9281 (ctxt->sax->externalSubset != NULL))
9282 ctxt->sax->externalSubset(ctxt->userData,
9283 ctxt->intSubName, ctxt->extSubSystem,
9284 ctxt->extSubURI);
9285 ctxt->inSubset = 0;
9286 ctxt->instate = XML_PARSER_PROLOG;
9287#ifdef DEBUG_PUSH
9288 xmlGenericError(xmlGenericErrorContext,
9289 "PP: entering PROLOG\n");
9290#endif
9291 }
9292 } else if ((cur == '<') && (next == '!') &&
9293 (avail < 9)) {
9294 goto done;
9295 } else {
9296 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009297 ctxt->progressive = 1;
9298 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009299#ifdef DEBUG_PUSH
9300 xmlGenericError(xmlGenericErrorContext,
9301 "PP: entering START_TAG\n");
9302#endif
9303 }
9304 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009305 case XML_PARSER_PROLOG:
9306 SKIP_BLANKS;
9307 if (ctxt->input->buf == NULL)
9308 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9309 else
9310 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9311 if (avail < 2)
9312 goto done;
9313 cur = ctxt->input->cur[0];
9314 next = ctxt->input->cur[1];
9315 if ((cur == '<') && (next == '?')) {
9316 if ((!terminate) &&
9317 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9318 goto done;
9319#ifdef DEBUG_PUSH
9320 xmlGenericError(xmlGenericErrorContext,
9321 "PP: Parsing PI\n");
9322#endif
9323 xmlParsePI(ctxt);
9324 } else if ((cur == '<') && (next == '!') &&
9325 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9326 if ((!terminate) &&
9327 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9328 goto done;
9329#ifdef DEBUG_PUSH
9330 xmlGenericError(xmlGenericErrorContext,
9331 "PP: Parsing Comment\n");
9332#endif
9333 xmlParseComment(ctxt);
9334 ctxt->instate = XML_PARSER_PROLOG;
9335 } else if ((cur == '<') && (next == '!') &&
9336 (avail < 4)) {
9337 goto done;
9338 } else {
9339 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009340 ctxt->progressive = 1;
9341 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009342#ifdef DEBUG_PUSH
9343 xmlGenericError(xmlGenericErrorContext,
9344 "PP: entering START_TAG\n");
9345#endif
9346 }
9347 break;
9348 case XML_PARSER_EPILOG:
9349 SKIP_BLANKS;
9350 if (ctxt->input->buf == NULL)
9351 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9352 else
9353 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9354 if (avail < 2)
9355 goto done;
9356 cur = ctxt->input->cur[0];
9357 next = ctxt->input->cur[1];
9358 if ((cur == '<') && (next == '?')) {
9359 if ((!terminate) &&
9360 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9361 goto done;
9362#ifdef DEBUG_PUSH
9363 xmlGenericError(xmlGenericErrorContext,
9364 "PP: Parsing PI\n");
9365#endif
9366 xmlParsePI(ctxt);
9367 ctxt->instate = XML_PARSER_EPILOG;
9368 } else if ((cur == '<') && (next == '!') &&
9369 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9370 if ((!terminate) &&
9371 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9372 goto done;
9373#ifdef DEBUG_PUSH
9374 xmlGenericError(xmlGenericErrorContext,
9375 "PP: Parsing Comment\n");
9376#endif
9377 xmlParseComment(ctxt);
9378 ctxt->instate = XML_PARSER_EPILOG;
9379 } else if ((cur == '<') && (next == '!') &&
9380 (avail < 4)) {
9381 goto done;
9382 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009383 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009384 ctxt->instate = XML_PARSER_EOF;
9385#ifdef DEBUG_PUSH
9386 xmlGenericError(xmlGenericErrorContext,
9387 "PP: entering EOF\n");
9388#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009389 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009390 ctxt->sax->endDocument(ctxt->userData);
9391 goto done;
9392 }
9393 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009394 case XML_PARSER_DTD: {
9395 /*
9396 * Sorry but progressive parsing of the internal subset
9397 * is not expected to be supported. We first check that
9398 * the full content of the internal subset is available and
9399 * the parsing is launched only at that point.
9400 * Internal subset ends up with "']' S? '>'" in an unescaped
9401 * section and not in a ']]>' sequence which are conditional
9402 * sections (whoever argued to keep that crap in XML deserve
9403 * a place in hell !).
9404 */
9405 int base, i;
9406 xmlChar *buf;
9407 xmlChar quote = 0;
9408
9409 base = ctxt->input->cur - ctxt->input->base;
9410 if (base < 0) return(0);
9411 if (ctxt->checkIndex > base)
9412 base = ctxt->checkIndex;
9413 buf = ctxt->input->buf->buffer->content;
9414 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9415 base++) {
9416 if (quote != 0) {
9417 if (buf[base] == quote)
9418 quote = 0;
9419 continue;
9420 }
9421 if (buf[base] == '"') {
9422 quote = '"';
9423 continue;
9424 }
9425 if (buf[base] == '\'') {
9426 quote = '\'';
9427 continue;
9428 }
9429 if (buf[base] == ']') {
9430 if ((unsigned int) base +1 >=
9431 ctxt->input->buf->buffer->use)
9432 break;
9433 if (buf[base + 1] == ']') {
9434 /* conditional crap, skip both ']' ! */
9435 base++;
9436 continue;
9437 }
9438 for (i = 0;
9439 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9440 i++) {
9441 if (buf[base + i] == '>')
9442 goto found_end_int_subset;
9443 }
9444 break;
9445 }
9446 }
9447 /*
9448 * We didn't found the end of the Internal subset
9449 */
9450 if (quote == 0)
9451 ctxt->checkIndex = base;
9452#ifdef DEBUG_PUSH
9453 if (next == 0)
9454 xmlGenericError(xmlGenericErrorContext,
9455 "PP: lookup of int subset end filed\n");
9456#endif
9457 goto done;
9458
9459found_end_int_subset:
9460 xmlParseInternalSubset(ctxt);
9461 ctxt->inSubset = 2;
9462 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9463 (ctxt->sax->externalSubset != NULL))
9464 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9465 ctxt->extSubSystem, ctxt->extSubURI);
9466 ctxt->inSubset = 0;
9467 ctxt->instate = XML_PARSER_PROLOG;
9468 ctxt->checkIndex = 0;
9469#ifdef DEBUG_PUSH
9470 xmlGenericError(xmlGenericErrorContext,
9471 "PP: entering PROLOG\n");
9472#endif
9473 break;
9474 }
9475 case XML_PARSER_COMMENT:
9476 xmlGenericError(xmlGenericErrorContext,
9477 "PP: internal error, state == COMMENT\n");
9478 ctxt->instate = XML_PARSER_CONTENT;
9479#ifdef DEBUG_PUSH
9480 xmlGenericError(xmlGenericErrorContext,
9481 "PP: entering CONTENT\n");
9482#endif
9483 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 case XML_PARSER_IGNORE:
9485 xmlGenericError(xmlGenericErrorContext,
9486 "PP: internal error, state == IGNORE");
9487 ctxt->instate = XML_PARSER_DTD;
9488#ifdef DEBUG_PUSH
9489 xmlGenericError(xmlGenericErrorContext,
9490 "PP: entering DTD\n");
9491#endif
9492 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009493 case XML_PARSER_PI:
9494 xmlGenericError(xmlGenericErrorContext,
9495 "PP: internal error, state == PI\n");
9496 ctxt->instate = XML_PARSER_CONTENT;
9497#ifdef DEBUG_PUSH
9498 xmlGenericError(xmlGenericErrorContext,
9499 "PP: entering CONTENT\n");
9500#endif
9501 break;
9502 case XML_PARSER_ENTITY_DECL:
9503 xmlGenericError(xmlGenericErrorContext,
9504 "PP: internal error, state == ENTITY_DECL\n");
9505 ctxt->instate = XML_PARSER_DTD;
9506#ifdef DEBUG_PUSH
9507 xmlGenericError(xmlGenericErrorContext,
9508 "PP: entering DTD\n");
9509#endif
9510 break;
9511 case XML_PARSER_ENTITY_VALUE:
9512 xmlGenericError(xmlGenericErrorContext,
9513 "PP: internal error, state == ENTITY_VALUE\n");
9514 ctxt->instate = XML_PARSER_CONTENT;
9515#ifdef DEBUG_PUSH
9516 xmlGenericError(xmlGenericErrorContext,
9517 "PP: entering DTD\n");
9518#endif
9519 break;
9520 case XML_PARSER_ATTRIBUTE_VALUE:
9521 xmlGenericError(xmlGenericErrorContext,
9522 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9523 ctxt->instate = XML_PARSER_START_TAG;
9524#ifdef DEBUG_PUSH
9525 xmlGenericError(xmlGenericErrorContext,
9526 "PP: entering START_TAG\n");
9527#endif
9528 break;
9529 case XML_PARSER_SYSTEM_LITERAL:
9530 xmlGenericError(xmlGenericErrorContext,
9531 "PP: internal error, state == SYSTEM_LITERAL\n");
9532 ctxt->instate = XML_PARSER_START_TAG;
9533#ifdef DEBUG_PUSH
9534 xmlGenericError(xmlGenericErrorContext,
9535 "PP: entering START_TAG\n");
9536#endif
9537 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009538 case XML_PARSER_PUBLIC_LITERAL:
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: internal error, state == PUBLIC_LITERAL\n");
9541 ctxt->instate = XML_PARSER_START_TAG;
9542#ifdef DEBUG_PUSH
9543 xmlGenericError(xmlGenericErrorContext,
9544 "PP: entering START_TAG\n");
9545#endif
9546 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009547 }
9548 }
9549done:
9550#ifdef DEBUG_PUSH
9551 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9552#endif
9553 return(ret);
9554}
9555
9556/**
Owen Taylor3473f882001-02-23 17:55:21 +00009557 * xmlParseChunk:
9558 * @ctxt: an XML parser context
9559 * @chunk: an char array
9560 * @size: the size in byte of the chunk
9561 * @terminate: last chunk indicator
9562 *
9563 * Parse a Chunk of memory
9564 *
9565 * Returns zero if no error, the xmlParserErrors otherwise.
9566 */
9567int
9568xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9569 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009570 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9571 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009572 if (ctxt->instate == XML_PARSER_START)
9573 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009574 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9575 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9576 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9577 int cur = ctxt->input->cur - ctxt->input->base;
9578
9579 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9580 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9581 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009582 ctxt->input->end =
9583 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009584#ifdef DEBUG_PUSH
9585 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9586#endif
9587
Owen Taylor3473f882001-02-23 17:55:21 +00009588 } else if (ctxt->instate != XML_PARSER_EOF) {
9589 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9590 xmlParserInputBufferPtr in = ctxt->input->buf;
9591 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9592 (in->raw != NULL)) {
9593 int nbchars;
9594
9595 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9596 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009597 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009598 xmlGenericError(xmlGenericErrorContext,
9599 "xmlParseChunk: encoder error\n");
9600 return(XML_ERR_INVALID_ENCODING);
9601 }
9602 }
9603 }
9604 }
9605 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009606 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9607 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009608 if (terminate) {
9609 /*
9610 * Check for termination
9611 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009612 int avail = 0;
9613 if (ctxt->input->buf == NULL)
9614 avail = ctxt->input->length -
9615 (ctxt->input->cur - ctxt->input->base);
9616 else
9617 avail = ctxt->input->buf->buffer->use -
9618 (ctxt->input->cur - ctxt->input->base);
9619
Owen Taylor3473f882001-02-23 17:55:21 +00009620 if ((ctxt->instate != XML_PARSER_EOF) &&
9621 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009623 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009624 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009625 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009626 }
Owen Taylor3473f882001-02-23 17:55:21 +00009627 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009628 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009629 ctxt->sax->endDocument(ctxt->userData);
9630 }
9631 ctxt->instate = XML_PARSER_EOF;
9632 }
9633 return((xmlParserErrors) ctxt->errNo);
9634}
9635
9636/************************************************************************
9637 * *
9638 * I/O front end functions to the parser *
9639 * *
9640 ************************************************************************/
9641
9642/**
9643 * xmlStopParser:
9644 * @ctxt: an XML parser context
9645 *
9646 * Blocks further parser processing
9647 */
9648void
9649xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009650 if (ctxt == NULL)
9651 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009652 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009653 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009654 if (ctxt->input != NULL)
9655 ctxt->input->cur = BAD_CAST"";
9656}
9657
9658/**
9659 * xmlCreatePushParserCtxt:
9660 * @sax: a SAX handler
9661 * @user_data: The user data returned on SAX callbacks
9662 * @chunk: a pointer to an array of chars
9663 * @size: number of chars in the array
9664 * @filename: an optional file name or URI
9665 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009666 * Create a parser context for using the XML parser in push mode.
9667 * If @buffer and @size are non-NULL, the data is used to detect
9668 * the encoding. The remaining characters will be parsed so they
9669 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009670 * To allow content encoding detection, @size should be >= 4
9671 * The value of @filename is used for fetching external entities
9672 * and error/warning reports.
9673 *
9674 * Returns the new parser context or NULL
9675 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009676
Owen Taylor3473f882001-02-23 17:55:21 +00009677xmlParserCtxtPtr
9678xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9679 const char *chunk, int size, const char *filename) {
9680 xmlParserCtxtPtr ctxt;
9681 xmlParserInputPtr inputStream;
9682 xmlParserInputBufferPtr buf;
9683 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9684
9685 /*
9686 * plug some encoding conversion routines
9687 */
9688 if ((chunk != NULL) && (size >= 4))
9689 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9690
9691 buf = xmlAllocParserInputBuffer(enc);
9692 if (buf == NULL) return(NULL);
9693
9694 ctxt = xmlNewParserCtxt();
9695 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009696 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009697 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009698 return(NULL);
9699 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009700 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9701 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009702 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009703 xmlFreeParserInputBuffer(buf);
9704 xmlFreeParserCtxt(ctxt);
9705 return(NULL);
9706 }
Owen Taylor3473f882001-02-23 17:55:21 +00009707 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009708#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009709 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009710#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009711 xmlFree(ctxt->sax);
9712 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9713 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009714 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009715 xmlFreeParserInputBuffer(buf);
9716 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009717 return(NULL);
9718 }
9719 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9720 if (user_data != NULL)
9721 ctxt->userData = user_data;
9722 }
9723 if (filename == NULL) {
9724 ctxt->directory = NULL;
9725 } else {
9726 ctxt->directory = xmlParserGetDirectory(filename);
9727 }
9728
9729 inputStream = xmlNewInputStream(ctxt);
9730 if (inputStream == NULL) {
9731 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009732 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009733 return(NULL);
9734 }
9735
9736 if (filename == NULL)
9737 inputStream->filename = NULL;
9738 else
Daniel Veillardf4862f02002-09-10 11:13:43 +00009739 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +00009740 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +00009741 inputStream->buf = buf;
9742 inputStream->base = inputStream->buf->buffer->content;
9743 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009744 inputStream->end =
9745 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009746
9747 inputPush(ctxt, inputStream);
9748
9749 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9750 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009751 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9752 int cur = ctxt->input->cur - ctxt->input->base;
9753
Owen Taylor3473f882001-02-23 17:55:21 +00009754 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00009755
9756 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9757 ctxt->input->cur = ctxt->input->base + cur;
9758 ctxt->input->end =
9759 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009760#ifdef DEBUG_PUSH
9761 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9762#endif
9763 }
9764
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009765 if (enc != XML_CHAR_ENCODING_NONE) {
9766 xmlSwitchEncoding(ctxt, enc);
9767 }
9768
Owen Taylor3473f882001-02-23 17:55:21 +00009769 return(ctxt);
9770}
Daniel Veillard73b013f2003-09-30 12:36:01 +00009771#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009772
9773/**
9774 * xmlCreateIOParserCtxt:
9775 * @sax: a SAX handler
9776 * @user_data: The user data returned on SAX callbacks
9777 * @ioread: an I/O read function
9778 * @ioclose: an I/O close function
9779 * @ioctx: an I/O handler
9780 * @enc: the charset encoding if known
9781 *
9782 * Create a parser context for using the XML parser with an existing
9783 * I/O stream
9784 *
9785 * Returns the new parser context or NULL
9786 */
9787xmlParserCtxtPtr
9788xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9789 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9790 void *ioctx, xmlCharEncoding enc) {
9791 xmlParserCtxtPtr ctxt;
9792 xmlParserInputPtr inputStream;
9793 xmlParserInputBufferPtr buf;
9794
9795 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9796 if (buf == NULL) return(NULL);
9797
9798 ctxt = xmlNewParserCtxt();
9799 if (ctxt == NULL) {
9800 xmlFree(buf);
9801 return(NULL);
9802 }
9803 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009804#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009805 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009806#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009807 xmlFree(ctxt->sax);
9808 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9809 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009811 xmlFree(ctxt);
9812 return(NULL);
9813 }
9814 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9815 if (user_data != NULL)
9816 ctxt->userData = user_data;
9817 }
9818
9819 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9820 if (inputStream == NULL) {
9821 xmlFreeParserCtxt(ctxt);
9822 return(NULL);
9823 }
9824 inputPush(ctxt, inputStream);
9825
9826 return(ctxt);
9827}
9828
Daniel Veillard4432df22003-09-28 18:58:27 +00009829#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009830/************************************************************************
9831 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009832 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009833 * *
9834 ************************************************************************/
9835
9836/**
9837 * xmlIOParseDTD:
9838 * @sax: the SAX handler block or NULL
9839 * @input: an Input Buffer
9840 * @enc: the charset encoding if known
9841 *
9842 * Load and parse a DTD
9843 *
9844 * Returns the resulting xmlDtdPtr or NULL in case of error.
9845 * @input will be freed at parsing end.
9846 */
9847
9848xmlDtdPtr
9849xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9850 xmlCharEncoding enc) {
9851 xmlDtdPtr ret = NULL;
9852 xmlParserCtxtPtr ctxt;
9853 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009854 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009855
9856 if (input == NULL)
9857 return(NULL);
9858
9859 ctxt = xmlNewParserCtxt();
9860 if (ctxt == NULL) {
9861 return(NULL);
9862 }
9863
9864 /*
9865 * Set-up the SAX context
9866 */
9867 if (sax != NULL) {
9868 if (ctxt->sax != NULL)
9869 xmlFree(ctxt->sax);
9870 ctxt->sax = sax;
9871 ctxt->userData = NULL;
9872 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009873 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009874
9875 /*
9876 * generate a parser input from the I/O handler
9877 */
9878
Daniel Veillard43caefb2003-12-07 19:32:22 +00009879 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +00009880 if (pinput == NULL) {
9881 if (sax != NULL) ctxt->sax = NULL;
9882 xmlFreeParserCtxt(ctxt);
9883 return(NULL);
9884 }
9885
9886 /*
9887 * plug some encoding conversion routines here.
9888 */
9889 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +00009890 if (enc != XML_CHAR_ENCODING_NONE) {
9891 xmlSwitchEncoding(ctxt, enc);
9892 }
Owen Taylor3473f882001-02-23 17:55:21 +00009893
9894 pinput->filename = NULL;
9895 pinput->line = 1;
9896 pinput->col = 1;
9897 pinput->base = ctxt->input->cur;
9898 pinput->cur = ctxt->input->cur;
9899 pinput->free = NULL;
9900
9901 /*
9902 * let's parse that entity knowing it's an external subset.
9903 */
9904 ctxt->inSubset = 2;
9905 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9906 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9907 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009908
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009909 if ((enc == XML_CHAR_ENCODING_NONE) &&
9910 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00009911 /*
9912 * Get the 4 first bytes and decode the charset
9913 * if enc != XML_CHAR_ENCODING_NONE
9914 * plug some encoding conversion routines.
9915 */
9916 start[0] = RAW;
9917 start[1] = NXT(1);
9918 start[2] = NXT(2);
9919 start[3] = NXT(3);
9920 enc = xmlDetectCharEncoding(start, 4);
9921 if (enc != XML_CHAR_ENCODING_NONE) {
9922 xmlSwitchEncoding(ctxt, enc);
9923 }
9924 }
9925
Owen Taylor3473f882001-02-23 17:55:21 +00009926 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9927
9928 if (ctxt->myDoc != NULL) {
9929 if (ctxt->wellFormed) {
9930 ret = ctxt->myDoc->extSubset;
9931 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +00009932 if (ret != NULL) {
9933 xmlNodePtr tmp;
9934
9935 ret->doc = NULL;
9936 tmp = ret->children;
9937 while (tmp != NULL) {
9938 tmp->doc = NULL;
9939 tmp = tmp->next;
9940 }
9941 }
Owen Taylor3473f882001-02-23 17:55:21 +00009942 } else {
9943 ret = NULL;
9944 }
9945 xmlFreeDoc(ctxt->myDoc);
9946 ctxt->myDoc = NULL;
9947 }
9948 if (sax != NULL) ctxt->sax = NULL;
9949 xmlFreeParserCtxt(ctxt);
9950
9951 return(ret);
9952}
9953
9954/**
9955 * xmlSAXParseDTD:
9956 * @sax: the SAX handler block
9957 * @ExternalID: a NAME* containing the External ID of the DTD
9958 * @SystemID: a NAME* containing the URL to the DTD
9959 *
9960 * Load and parse an external subset.
9961 *
9962 * Returns the resulting xmlDtdPtr or NULL in case of error.
9963 */
9964
9965xmlDtdPtr
9966xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9967 const xmlChar *SystemID) {
9968 xmlDtdPtr ret = NULL;
9969 xmlParserCtxtPtr ctxt;
9970 xmlParserInputPtr input = NULL;
9971 xmlCharEncoding enc;
9972
9973 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9974
9975 ctxt = xmlNewParserCtxt();
9976 if (ctxt == NULL) {
9977 return(NULL);
9978 }
9979
9980 /*
9981 * Set-up the SAX context
9982 */
9983 if (sax != NULL) {
9984 if (ctxt->sax != NULL)
9985 xmlFree(ctxt->sax);
9986 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +00009987 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +00009988 }
9989
9990 /*
9991 * Ask the Entity resolver to load the damn thing
9992 */
9993
9994 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +00009995 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +00009996 if (input == NULL) {
9997 if (sax != NULL) ctxt->sax = NULL;
9998 xmlFreeParserCtxt(ctxt);
9999 return(NULL);
10000 }
10001
10002 /*
10003 * plug some encoding conversion routines here.
10004 */
10005 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010006 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10007 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10008 xmlSwitchEncoding(ctxt, enc);
10009 }
Owen Taylor3473f882001-02-23 17:55:21 +000010010
10011 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010012 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010013 input->line = 1;
10014 input->col = 1;
10015 input->base = ctxt->input->cur;
10016 input->cur = ctxt->input->cur;
10017 input->free = NULL;
10018
10019 /*
10020 * let's parse that entity knowing it's an external subset.
10021 */
10022 ctxt->inSubset = 2;
10023 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10024 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10025 ExternalID, SystemID);
10026 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10027
10028 if (ctxt->myDoc != NULL) {
10029 if (ctxt->wellFormed) {
10030 ret = ctxt->myDoc->extSubset;
10031 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010032 if (ret != NULL) {
10033 xmlNodePtr tmp;
10034
10035 ret->doc = NULL;
10036 tmp = ret->children;
10037 while (tmp != NULL) {
10038 tmp->doc = NULL;
10039 tmp = tmp->next;
10040 }
10041 }
Owen Taylor3473f882001-02-23 17:55:21 +000010042 } else {
10043 ret = NULL;
10044 }
10045 xmlFreeDoc(ctxt->myDoc);
10046 ctxt->myDoc = NULL;
10047 }
10048 if (sax != NULL) ctxt->sax = NULL;
10049 xmlFreeParserCtxt(ctxt);
10050
10051 return(ret);
10052}
10053
Daniel Veillard4432df22003-09-28 18:58:27 +000010054
Owen Taylor3473f882001-02-23 17:55:21 +000010055/**
10056 * xmlParseDTD:
10057 * @ExternalID: a NAME* containing the External ID of the DTD
10058 * @SystemID: a NAME* containing the URL to the DTD
10059 *
10060 * Load and parse an external subset.
10061 *
10062 * Returns the resulting xmlDtdPtr or NULL in case of error.
10063 */
10064
10065xmlDtdPtr
10066xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10067 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10068}
Daniel Veillard4432df22003-09-28 18:58:27 +000010069#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010070
10071/************************************************************************
10072 * *
10073 * Front ends when parsing an Entity *
10074 * *
10075 ************************************************************************/
10076
10077/**
Owen Taylor3473f882001-02-23 17:55:21 +000010078 * xmlParseCtxtExternalEntity:
10079 * @ctx: the existing parsing context
10080 * @URL: the URL for the entity to load
10081 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010082 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010083 *
10084 * Parse an external general entity within an existing parsing context
10085 * An external general parsed entity is well-formed if it matches the
10086 * production labeled extParsedEnt.
10087 *
10088 * [78] extParsedEnt ::= TextDecl? content
10089 *
10090 * Returns 0 if the entity is well formed, -1 in case of args problem and
10091 * the parser error code otherwise
10092 */
10093
10094int
10095xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010096 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010097 xmlParserCtxtPtr ctxt;
10098 xmlDocPtr newDoc;
10099 xmlSAXHandlerPtr oldsax = NULL;
10100 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010101 xmlChar start[4];
10102 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010103
10104 if (ctx->depth > 40) {
10105 return(XML_ERR_ENTITY_LOOP);
10106 }
10107
Daniel Veillardcda96922001-08-21 10:56:31 +000010108 if (lst != NULL)
10109 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010110 if ((URL == NULL) && (ID == NULL))
10111 return(-1);
10112 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10113 return(-1);
10114
10115
10116 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10117 if (ctxt == NULL) return(-1);
10118 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010119 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010120 oldsax = ctxt->sax;
10121 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010122 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010123 newDoc = xmlNewDoc(BAD_CAST "1.0");
10124 if (newDoc == NULL) {
10125 xmlFreeParserCtxt(ctxt);
10126 return(-1);
10127 }
10128 if (ctx->myDoc != NULL) {
10129 newDoc->intSubset = ctx->myDoc->intSubset;
10130 newDoc->extSubset = ctx->myDoc->extSubset;
10131 }
10132 if (ctx->myDoc->URL != NULL) {
10133 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10134 }
10135 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10136 if (newDoc->children == NULL) {
10137 ctxt->sax = oldsax;
10138 xmlFreeParserCtxt(ctxt);
10139 newDoc->intSubset = NULL;
10140 newDoc->extSubset = NULL;
10141 xmlFreeDoc(newDoc);
10142 return(-1);
10143 }
10144 nodePush(ctxt, newDoc->children);
10145 if (ctx->myDoc == NULL) {
10146 ctxt->myDoc = newDoc;
10147 } else {
10148 ctxt->myDoc = ctx->myDoc;
10149 newDoc->children->doc = ctx->myDoc;
10150 }
10151
Daniel Veillard87a764e2001-06-20 17:41:10 +000010152 /*
10153 * Get the 4 first bytes and decode the charset
10154 * if enc != XML_CHAR_ENCODING_NONE
10155 * plug some encoding conversion routines.
10156 */
10157 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010158 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10159 start[0] = RAW;
10160 start[1] = NXT(1);
10161 start[2] = NXT(2);
10162 start[3] = NXT(3);
10163 enc = xmlDetectCharEncoding(start, 4);
10164 if (enc != XML_CHAR_ENCODING_NONE) {
10165 xmlSwitchEncoding(ctxt, enc);
10166 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010167 }
10168
Owen Taylor3473f882001-02-23 17:55:21 +000010169 /*
10170 * Parse a possible text declaration first
10171 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010172 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010173 xmlParseTextDecl(ctxt);
10174 }
10175
10176 /*
10177 * Doing validity checking on chunk doesn't make sense
10178 */
10179 ctxt->instate = XML_PARSER_CONTENT;
10180 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010181 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010182 ctxt->loadsubset = ctx->loadsubset;
10183 ctxt->depth = ctx->depth + 1;
10184 ctxt->replaceEntities = ctx->replaceEntities;
10185 if (ctxt->validate) {
10186 ctxt->vctxt.error = ctx->vctxt.error;
10187 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010188 } else {
10189 ctxt->vctxt.error = NULL;
10190 ctxt->vctxt.warning = NULL;
10191 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010192 ctxt->vctxt.nodeTab = NULL;
10193 ctxt->vctxt.nodeNr = 0;
10194 ctxt->vctxt.nodeMax = 0;
10195 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010196 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10197 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010198 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10199 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10200 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010201 ctxt->dictNames = ctx->dictNames;
10202 ctxt->attsDefault = ctx->attsDefault;
10203 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010204
10205 xmlParseContent(ctxt);
10206
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010207 ctx->validate = ctxt->validate;
10208 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010209 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010210 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010211 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010212 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010213 }
10214 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010215 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010216 }
10217
10218 if (!ctxt->wellFormed) {
10219 if (ctxt->errNo == 0)
10220 ret = 1;
10221 else
10222 ret = ctxt->errNo;
10223 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010224 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010225 xmlNodePtr cur;
10226
10227 /*
10228 * Return the newly created nodeset after unlinking it from
10229 * they pseudo parent.
10230 */
10231 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010232 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010233 while (cur != NULL) {
10234 cur->parent = NULL;
10235 cur = cur->next;
10236 }
10237 newDoc->children->children = NULL;
10238 }
10239 ret = 0;
10240 }
10241 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010242 ctxt->dict = NULL;
10243 ctxt->attsDefault = NULL;
10244 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010245 xmlFreeParserCtxt(ctxt);
10246 newDoc->intSubset = NULL;
10247 newDoc->extSubset = NULL;
10248 xmlFreeDoc(newDoc);
10249
10250 return(ret);
10251}
10252
10253/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010254 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010255 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010256 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010257 * @sax: the SAX handler bloc (possibly NULL)
10258 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10259 * @depth: Used for loop detection, use 0
10260 * @URL: the URL for the entity to load
10261 * @ID: the System ID for the entity to load
10262 * @list: the return value for the set of parsed nodes
10263 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010264 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010265 *
10266 * Returns 0 if the entity is well formed, -1 in case of args problem and
10267 * the parser error code otherwise
10268 */
10269
Daniel Veillard7d515752003-09-26 19:12:37 +000010270static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010271xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10272 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010273 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010274 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010275 xmlParserCtxtPtr ctxt;
10276 xmlDocPtr newDoc;
10277 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010278 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010279 xmlChar start[4];
10280 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010281
10282 if (depth > 40) {
10283 return(XML_ERR_ENTITY_LOOP);
10284 }
10285
10286
10287
10288 if (list != NULL)
10289 *list = NULL;
10290 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010291 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010292 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010293 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010294
10295
10296 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010297 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010298 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010299 if (oldctxt != NULL) {
10300 ctxt->_private = oldctxt->_private;
10301 ctxt->loadsubset = oldctxt->loadsubset;
10302 ctxt->validate = oldctxt->validate;
10303 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010304 ctxt->record_info = oldctxt->record_info;
10305 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10306 ctxt->node_seq.length = oldctxt->node_seq.length;
10307 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010308 } else {
10309 /*
10310 * Doing validity checking on chunk without context
10311 * doesn't make sense
10312 */
10313 ctxt->_private = NULL;
10314 ctxt->validate = 0;
10315 ctxt->external = 2;
10316 ctxt->loadsubset = 0;
10317 }
Owen Taylor3473f882001-02-23 17:55:21 +000010318 if (sax != NULL) {
10319 oldsax = ctxt->sax;
10320 ctxt->sax = sax;
10321 if (user_data != NULL)
10322 ctxt->userData = user_data;
10323 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010324 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010325 newDoc = xmlNewDoc(BAD_CAST "1.0");
10326 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010327 ctxt->node_seq.maximum = 0;
10328 ctxt->node_seq.length = 0;
10329 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010330 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010331 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010332 }
10333 if (doc != NULL) {
10334 newDoc->intSubset = doc->intSubset;
10335 newDoc->extSubset = doc->extSubset;
10336 }
10337 if (doc->URL != NULL) {
10338 newDoc->URL = xmlStrdup(doc->URL);
10339 }
10340 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10341 if (newDoc->children == NULL) {
10342 if (sax != NULL)
10343 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010344 ctxt->node_seq.maximum = 0;
10345 ctxt->node_seq.length = 0;
10346 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010347 xmlFreeParserCtxt(ctxt);
10348 newDoc->intSubset = NULL;
10349 newDoc->extSubset = NULL;
10350 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010351 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010352 }
10353 nodePush(ctxt, newDoc->children);
10354 if (doc == NULL) {
10355 ctxt->myDoc = newDoc;
10356 } else {
10357 ctxt->myDoc = doc;
10358 newDoc->children->doc = doc;
10359 }
10360
Daniel Veillard87a764e2001-06-20 17:41:10 +000010361 /*
10362 * Get the 4 first bytes and decode the charset
10363 * if enc != XML_CHAR_ENCODING_NONE
10364 * plug some encoding conversion routines.
10365 */
10366 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010367 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10368 start[0] = RAW;
10369 start[1] = NXT(1);
10370 start[2] = NXT(2);
10371 start[3] = NXT(3);
10372 enc = xmlDetectCharEncoding(start, 4);
10373 if (enc != XML_CHAR_ENCODING_NONE) {
10374 xmlSwitchEncoding(ctxt, enc);
10375 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010376 }
10377
Owen Taylor3473f882001-02-23 17:55:21 +000010378 /*
10379 * Parse a possible text declaration first
10380 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010381 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010382 xmlParseTextDecl(ctxt);
10383 }
10384
Owen Taylor3473f882001-02-23 17:55:21 +000010385 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010386 ctxt->depth = depth;
10387
10388 xmlParseContent(ctxt);
10389
Daniel Veillard561b7f82002-03-20 21:55:57 +000010390 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010391 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010392 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010393 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010394 }
10395 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010396 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010397 }
10398
10399 if (!ctxt->wellFormed) {
10400 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010401 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010402 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010403 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010404 } else {
10405 if (list != NULL) {
10406 xmlNodePtr cur;
10407
10408 /*
10409 * Return the newly created nodeset after unlinking it from
10410 * they pseudo parent.
10411 */
10412 cur = newDoc->children->children;
10413 *list = cur;
10414 while (cur != NULL) {
10415 cur->parent = NULL;
10416 cur = cur->next;
10417 }
10418 newDoc->children->children = NULL;
10419 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010420 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010421 }
10422 if (sax != NULL)
10423 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010424 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10425 oldctxt->node_seq.length = ctxt->node_seq.length;
10426 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010427 ctxt->node_seq.maximum = 0;
10428 ctxt->node_seq.length = 0;
10429 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010430 xmlFreeParserCtxt(ctxt);
10431 newDoc->intSubset = NULL;
10432 newDoc->extSubset = NULL;
10433 xmlFreeDoc(newDoc);
10434
10435 return(ret);
10436}
10437
Daniel Veillard81273902003-09-30 00:43:48 +000010438#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010439/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010440 * xmlParseExternalEntity:
10441 * @doc: the document the chunk pertains to
10442 * @sax: the SAX handler bloc (possibly NULL)
10443 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10444 * @depth: Used for loop detection, use 0
10445 * @URL: the URL for the entity to load
10446 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010447 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010448 *
10449 * Parse an external general entity
10450 * An external general parsed entity is well-formed if it matches the
10451 * production labeled extParsedEnt.
10452 *
10453 * [78] extParsedEnt ::= TextDecl? content
10454 *
10455 * Returns 0 if the entity is well formed, -1 in case of args problem and
10456 * the parser error code otherwise
10457 */
10458
10459int
10460xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010461 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010462 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010463 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010464}
10465
10466/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010467 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010468 * @doc: the document the chunk pertains to
10469 * @sax: the SAX handler bloc (possibly NULL)
10470 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10471 * @depth: Used for loop detection, use 0
10472 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010473 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010474 *
10475 * Parse a well-balanced chunk of an XML document
10476 * called by the parser
10477 * The allowed sequence for the Well Balanced Chunk is the one defined by
10478 * the content production in the XML grammar:
10479 *
10480 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10481 *
10482 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10483 * the parser error code otherwise
10484 */
10485
10486int
10487xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010488 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010489 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10490 depth, string, lst, 0 );
10491}
Daniel Veillard81273902003-09-30 00:43:48 +000010492#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010493
10494/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010495 * xmlParseBalancedChunkMemoryInternal:
10496 * @oldctxt: the existing parsing context
10497 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10498 * @user_data: the user data field for the parser context
10499 * @lst: the return value for the set of parsed nodes
10500 *
10501 *
10502 * Parse a well-balanced chunk of an XML document
10503 * called by the parser
10504 * The allowed sequence for the Well Balanced Chunk is the one defined by
10505 * the content production in the XML grammar:
10506 *
10507 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10508 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010509 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10510 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010511 *
10512 * In case recover is set to 1, the nodelist will not be empty even if
10513 * the parsed chunk is not well balanced.
10514 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010515static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010516xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10517 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10518 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010519 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010520 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010521 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010522 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010523 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010524
10525 if (oldctxt->depth > 40) {
10526 return(XML_ERR_ENTITY_LOOP);
10527 }
10528
10529
10530 if (lst != NULL)
10531 *lst = NULL;
10532 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010533 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010534
10535 size = xmlStrlen(string);
10536
10537 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010538 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010539 if (user_data != NULL)
10540 ctxt->userData = user_data;
10541 else
10542 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010543 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10544 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010545 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10546 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10547 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010548
10549 oldsax = ctxt->sax;
10550 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010551 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010552 ctxt->replaceEntities = oldctxt->replaceEntities;
10553 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010554
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010555 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010556 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010557 newDoc = xmlNewDoc(BAD_CAST "1.0");
10558 if (newDoc == NULL) {
10559 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010560 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010561 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010562 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010563 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010564 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010565 } else {
10566 ctxt->myDoc = oldctxt->myDoc;
10567 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010568 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010569 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010570 BAD_CAST "pseudoroot", NULL);
10571 if (ctxt->myDoc->children == NULL) {
10572 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010573 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010574 xmlFreeParserCtxt(ctxt);
10575 if (newDoc != NULL)
10576 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010577 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010578 }
10579 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010580 ctxt->instate = XML_PARSER_CONTENT;
10581 ctxt->depth = oldctxt->depth + 1;
10582
Daniel Veillard328f48c2002-11-15 15:24:34 +000010583 ctxt->validate = 0;
10584 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010585 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10586 /*
10587 * ID/IDREF registration will be done in xmlValidateElement below
10588 */
10589 ctxt->loadsubset |= XML_SKIP_IDS;
10590 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010591 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010592 ctxt->attsDefault = oldctxt->attsDefault;
10593 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010594
Daniel Veillard68e9e742002-11-16 15:35:11 +000010595 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010596 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010597 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010598 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010599 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010600 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010601 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010602 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010603 }
10604
10605 if (!ctxt->wellFormed) {
10606 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010607 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010608 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010609 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010610 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010611 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010612 }
10613
William M. Brack7b9154b2003-09-27 19:23:50 +000010614 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010615 xmlNodePtr cur;
10616
10617 /*
10618 * Return the newly created nodeset after unlinking it from
10619 * they pseudo parent.
10620 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010621 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010622 *lst = cur;
10623 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010624#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010625 if (oldctxt->validate && oldctxt->wellFormed &&
10626 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10627 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10628 oldctxt->myDoc, cur);
10629 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010630#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010631 cur->parent = NULL;
10632 cur = cur->next;
10633 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010634 ctxt->myDoc->children->children = NULL;
10635 }
10636 if (ctxt->myDoc != NULL) {
10637 xmlFreeNode(ctxt->myDoc->children);
10638 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010639 }
10640
10641 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010642 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010643 ctxt->attsDefault = NULL;
10644 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010645 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010646 if (newDoc != NULL)
10647 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010648
10649 return(ret);
10650}
10651
Daniel Veillard81273902003-09-30 00:43:48 +000010652#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000010653/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000010654 * xmlParseBalancedChunkMemoryRecover:
10655 * @doc: the document the chunk pertains to
10656 * @sax: the SAX handler bloc (possibly NULL)
10657 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10658 * @depth: Used for loop detection, use 0
10659 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10660 * @lst: the return value for the set of parsed nodes
10661 * @recover: return nodes even if the data is broken (use 0)
10662 *
10663 *
10664 * Parse a well-balanced chunk of an XML document
10665 * called by the parser
10666 * The allowed sequence for the Well Balanced Chunk is the one defined by
10667 * the content production in the XML grammar:
10668 *
10669 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10670 *
10671 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10672 * the parser error code otherwise
10673 *
10674 * In case recover is set to 1, the nodelist will not be empty even if
10675 * the parsed chunk is not well balanced.
10676 */
10677int
10678xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
10679 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
10680 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000010681 xmlParserCtxtPtr ctxt;
10682 xmlDocPtr newDoc;
10683 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000010684 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000010685 int size;
10686 int ret = 0;
10687
10688 if (depth > 40) {
10689 return(XML_ERR_ENTITY_LOOP);
10690 }
10691
10692
Daniel Veillardcda96922001-08-21 10:56:31 +000010693 if (lst != NULL)
10694 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010695 if (string == NULL)
10696 return(-1);
10697
10698 size = xmlStrlen(string);
10699
10700 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
10701 if (ctxt == NULL) return(-1);
10702 ctxt->userData = ctxt;
10703 if (sax != NULL) {
10704 oldsax = ctxt->sax;
10705 ctxt->sax = sax;
10706 if (user_data != NULL)
10707 ctxt->userData = user_data;
10708 }
10709 newDoc = xmlNewDoc(BAD_CAST "1.0");
10710 if (newDoc == NULL) {
10711 xmlFreeParserCtxt(ctxt);
10712 return(-1);
10713 }
10714 if (doc != NULL) {
10715 newDoc->intSubset = doc->intSubset;
10716 newDoc->extSubset = doc->extSubset;
10717 }
10718 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10719 if (newDoc->children == NULL) {
10720 if (sax != NULL)
10721 ctxt->sax = oldsax;
10722 xmlFreeParserCtxt(ctxt);
10723 newDoc->intSubset = NULL;
10724 newDoc->extSubset = NULL;
10725 xmlFreeDoc(newDoc);
10726 return(-1);
10727 }
10728 nodePush(ctxt, newDoc->children);
10729 if (doc == NULL) {
10730 ctxt->myDoc = newDoc;
10731 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000010732 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000010733 newDoc->children->doc = doc;
10734 }
10735 ctxt->instate = XML_PARSER_CONTENT;
10736 ctxt->depth = depth;
10737
10738 /*
10739 * Doing validity checking on chunk doesn't make sense
10740 */
10741 ctxt->validate = 0;
10742 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010743 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010744
Daniel Veillardb39bc392002-10-26 19:29:51 +000010745 if ( doc != NULL ){
10746 content = doc->children;
10747 doc->children = NULL;
10748 xmlParseContent(ctxt);
10749 doc->children = content;
10750 }
10751 else {
10752 xmlParseContent(ctxt);
10753 }
Owen Taylor3473f882001-02-23 17:55:21 +000010754 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010755 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010756 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010757 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010758 }
10759 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010760 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010761 }
10762
10763 if (!ctxt->wellFormed) {
10764 if (ctxt->errNo == 0)
10765 ret = 1;
10766 else
10767 ret = ctxt->errNo;
10768 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010769 ret = 0;
10770 }
10771
10772 if (lst != NULL && (ret == 0 || recover == 1)) {
10773 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010774
10775 /*
10776 * Return the newly created nodeset after unlinking it from
10777 * they pseudo parent.
10778 */
10779 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010780 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010781 while (cur != NULL) {
10782 cur->parent = NULL;
10783 cur = cur->next;
10784 }
10785 newDoc->children->children = NULL;
10786 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000010787
Owen Taylor3473f882001-02-23 17:55:21 +000010788 if (sax != NULL)
10789 ctxt->sax = oldsax;
10790 xmlFreeParserCtxt(ctxt);
10791 newDoc->intSubset = NULL;
10792 newDoc->extSubset = NULL;
10793 xmlFreeDoc(newDoc);
10794
10795 return(ret);
10796}
10797
10798/**
10799 * xmlSAXParseEntity:
10800 * @sax: the SAX handler block
10801 * @filename: the filename
10802 *
10803 * parse an XML external entity out of context and build a tree.
10804 * It use the given SAX function block to handle the parsing callback.
10805 * If sax is NULL, fallback to the default DOM tree building routines.
10806 *
10807 * [78] extParsedEnt ::= TextDecl? content
10808 *
10809 * This correspond to a "Well Balanced" chunk
10810 *
10811 * Returns the resulting document tree
10812 */
10813
10814xmlDocPtr
10815xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
10816 xmlDocPtr ret;
10817 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010818
10819 ctxt = xmlCreateFileParserCtxt(filename);
10820 if (ctxt == NULL) {
10821 return(NULL);
10822 }
10823 if (sax != NULL) {
10824 if (ctxt->sax != NULL)
10825 xmlFree(ctxt->sax);
10826 ctxt->sax = sax;
10827 ctxt->userData = NULL;
10828 }
10829
Owen Taylor3473f882001-02-23 17:55:21 +000010830 xmlParseExtParsedEnt(ctxt);
10831
10832 if (ctxt->wellFormed)
10833 ret = ctxt->myDoc;
10834 else {
10835 ret = NULL;
10836 xmlFreeDoc(ctxt->myDoc);
10837 ctxt->myDoc = NULL;
10838 }
10839 if (sax != NULL)
10840 ctxt->sax = NULL;
10841 xmlFreeParserCtxt(ctxt);
10842
10843 return(ret);
10844}
10845
10846/**
10847 * xmlParseEntity:
10848 * @filename: the filename
10849 *
10850 * parse an XML external entity out of context and build a tree.
10851 *
10852 * [78] extParsedEnt ::= TextDecl? content
10853 *
10854 * This correspond to a "Well Balanced" chunk
10855 *
10856 * Returns the resulting document tree
10857 */
10858
10859xmlDocPtr
10860xmlParseEntity(const char *filename) {
10861 return(xmlSAXParseEntity(NULL, filename));
10862}
Daniel Veillard81273902003-09-30 00:43:48 +000010863#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010864
10865/**
10866 * xmlCreateEntityParserCtxt:
10867 * @URL: the entity URL
10868 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010869 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000010870 *
10871 * Create a parser context for an external entity
10872 * Automatic support for ZLIB/Compress compressed document is provided
10873 * by default if found at compile-time.
10874 *
10875 * Returns the new parser context or NULL
10876 */
10877xmlParserCtxtPtr
10878xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10879 const xmlChar *base) {
10880 xmlParserCtxtPtr ctxt;
10881 xmlParserInputPtr inputStream;
10882 char *directory = NULL;
10883 xmlChar *uri;
10884
10885 ctxt = xmlNewParserCtxt();
10886 if (ctxt == NULL) {
10887 return(NULL);
10888 }
10889
10890 uri = xmlBuildURI(URL, base);
10891
10892 if (uri == NULL) {
10893 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10894 if (inputStream == NULL) {
10895 xmlFreeParserCtxt(ctxt);
10896 return(NULL);
10897 }
10898
10899 inputPush(ctxt, inputStream);
10900
10901 if ((ctxt->directory == NULL) && (directory == NULL))
10902 directory = xmlParserGetDirectory((char *)URL);
10903 if ((ctxt->directory == NULL) && (directory != NULL))
10904 ctxt->directory = directory;
10905 } else {
10906 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10907 if (inputStream == NULL) {
10908 xmlFree(uri);
10909 xmlFreeParserCtxt(ctxt);
10910 return(NULL);
10911 }
10912
10913 inputPush(ctxt, inputStream);
10914
10915 if ((ctxt->directory == NULL) && (directory == NULL))
10916 directory = xmlParserGetDirectory((char *)uri);
10917 if ((ctxt->directory == NULL) && (directory != NULL))
10918 ctxt->directory = directory;
10919 xmlFree(uri);
10920 }
Owen Taylor3473f882001-02-23 17:55:21 +000010921 return(ctxt);
10922}
10923
10924/************************************************************************
10925 * *
10926 * Front ends when parsing from a file *
10927 * *
10928 ************************************************************************/
10929
10930/**
Daniel Veillard61b93382003-11-03 14:28:31 +000010931 * xmlCreateURLParserCtxt:
10932 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010933 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000010934 *
Daniel Veillard61b93382003-11-03 14:28:31 +000010935 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000010936 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000010937 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000010938 *
10939 * Returns the new parser context or NULL
10940 */
10941xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000010942xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000010943{
10944 xmlParserCtxtPtr ctxt;
10945 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000010946 char *directory = NULL;
10947
Owen Taylor3473f882001-02-23 17:55:21 +000010948 ctxt = xmlNewParserCtxt();
10949 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010950 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000010951 return(NULL);
10952 }
10953
Daniel Veillard61b93382003-11-03 14:28:31 +000010954 if (options != 0)
10955 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000010956
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000010957 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010958 if (inputStream == NULL) {
10959 xmlFreeParserCtxt(ctxt);
10960 return(NULL);
10961 }
10962
Owen Taylor3473f882001-02-23 17:55:21 +000010963 inputPush(ctxt, inputStream);
10964 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010965 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010966 if ((ctxt->directory == NULL) && (directory != NULL))
10967 ctxt->directory = directory;
10968
10969 return(ctxt);
10970}
10971
Daniel Veillard61b93382003-11-03 14:28:31 +000010972/**
10973 * xmlCreateFileParserCtxt:
10974 * @filename: the filename
10975 *
10976 * Create a parser context for a file content.
10977 * Automatic support for ZLIB/Compress compressed document is provided
10978 * by default if found at compile-time.
10979 *
10980 * Returns the new parser context or NULL
10981 */
10982xmlParserCtxtPtr
10983xmlCreateFileParserCtxt(const char *filename)
10984{
10985 return(xmlCreateURLParserCtxt(filename, 0));
10986}
10987
Daniel Veillard81273902003-09-30 00:43:48 +000010988#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010989/**
Daniel Veillarda293c322001-10-02 13:54:14 +000010990 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000010991 * @sax: the SAX handler block
10992 * @filename: the filename
10993 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10994 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000010995 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000010996 *
10997 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10998 * compressed document is provided by default if found at compile-time.
10999 * It use the given SAX function block to handle the parsing callback.
11000 * If sax is NULL, fallback to the default DOM tree building routines.
11001 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011002 * User data (void *) is stored within the parser context in the
11003 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011004 *
Owen Taylor3473f882001-02-23 17:55:21 +000011005 * Returns the resulting document tree
11006 */
11007
11008xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011009xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11010 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011011 xmlDocPtr ret;
11012 xmlParserCtxtPtr ctxt;
11013 char *directory = NULL;
11014
Daniel Veillard635ef722001-10-29 11:48:19 +000011015 xmlInitParser();
11016
Owen Taylor3473f882001-02-23 17:55:21 +000011017 ctxt = xmlCreateFileParserCtxt(filename);
11018 if (ctxt == NULL) {
11019 return(NULL);
11020 }
11021 if (sax != NULL) {
11022 if (ctxt->sax != NULL)
11023 xmlFree(ctxt->sax);
11024 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011025 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011026 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011027 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011028 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011029 }
Owen Taylor3473f882001-02-23 17:55:21 +000011030
11031 if ((ctxt->directory == NULL) && (directory == NULL))
11032 directory = xmlParserGetDirectory(filename);
11033 if ((ctxt->directory == NULL) && (directory != NULL))
11034 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11035
Daniel Veillarddad3f682002-11-17 16:47:27 +000011036 ctxt->recovery = recovery;
11037
Owen Taylor3473f882001-02-23 17:55:21 +000011038 xmlParseDocument(ctxt);
11039
William M. Brackc07329e2003-09-08 01:57:30 +000011040 if ((ctxt->wellFormed) || recovery) {
11041 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011042 if (ret != NULL) {
11043 if (ctxt->input->buf->compressed > 0)
11044 ret->compression = 9;
11045 else
11046 ret->compression = ctxt->input->buf->compressed;
11047 }
William M. Brackc07329e2003-09-08 01:57:30 +000011048 }
Owen Taylor3473f882001-02-23 17:55:21 +000011049 else {
11050 ret = NULL;
11051 xmlFreeDoc(ctxt->myDoc);
11052 ctxt->myDoc = NULL;
11053 }
11054 if (sax != NULL)
11055 ctxt->sax = NULL;
11056 xmlFreeParserCtxt(ctxt);
11057
11058 return(ret);
11059}
11060
11061/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011062 * xmlSAXParseFile:
11063 * @sax: the SAX handler block
11064 * @filename: the filename
11065 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11066 * documents
11067 *
11068 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11069 * compressed document is provided by default if found at compile-time.
11070 * It use the given SAX function block to handle the parsing callback.
11071 * If sax is NULL, fallback to the default DOM tree building routines.
11072 *
11073 * Returns the resulting document tree
11074 */
11075
11076xmlDocPtr
11077xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11078 int recovery) {
11079 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11080}
11081
11082/**
Owen Taylor3473f882001-02-23 17:55:21 +000011083 * xmlRecoverDoc:
11084 * @cur: a pointer to an array of xmlChar
11085 *
11086 * parse an XML in-memory document and build a tree.
11087 * In the case the document is not Well Formed, a tree is built anyway
11088 *
11089 * Returns the resulting document tree
11090 */
11091
11092xmlDocPtr
11093xmlRecoverDoc(xmlChar *cur) {
11094 return(xmlSAXParseDoc(NULL, cur, 1));
11095}
11096
11097/**
11098 * xmlParseFile:
11099 * @filename: the filename
11100 *
11101 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11102 * compressed document is provided by default if found at compile-time.
11103 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011104 * Returns the resulting document tree if the file was wellformed,
11105 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011106 */
11107
11108xmlDocPtr
11109xmlParseFile(const char *filename) {
11110 return(xmlSAXParseFile(NULL, filename, 0));
11111}
11112
11113/**
11114 * xmlRecoverFile:
11115 * @filename: the filename
11116 *
11117 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11118 * compressed document is provided by default if found at compile-time.
11119 * In the case the document is not Well Formed, a tree is built anyway
11120 *
11121 * Returns the resulting document tree
11122 */
11123
11124xmlDocPtr
11125xmlRecoverFile(const char *filename) {
11126 return(xmlSAXParseFile(NULL, filename, 1));
11127}
11128
11129
11130/**
11131 * xmlSetupParserForBuffer:
11132 * @ctxt: an XML parser context
11133 * @buffer: a xmlChar * buffer
11134 * @filename: a file name
11135 *
11136 * Setup the parser context to parse a new buffer; Clears any prior
11137 * contents from the parser context. The buffer parameter must not be
11138 * NULL, but the filename parameter can be
11139 */
11140void
11141xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11142 const char* filename)
11143{
11144 xmlParserInputPtr input;
11145
11146 input = xmlNewInputStream(ctxt);
11147 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011148 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011149 xmlFree(ctxt);
11150 return;
11151 }
11152
11153 xmlClearParserCtxt(ctxt);
11154 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011155 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011156 input->base = buffer;
11157 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011158 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011159 inputPush(ctxt, input);
11160}
11161
11162/**
11163 * xmlSAXUserParseFile:
11164 * @sax: a SAX handler
11165 * @user_data: The user data returned on SAX callbacks
11166 * @filename: a file name
11167 *
11168 * parse an XML file and call the given SAX handler routines.
11169 * Automatic support for ZLIB/Compress compressed document is provided
11170 *
11171 * Returns 0 in case of success or a error number otherwise
11172 */
11173int
11174xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11175 const char *filename) {
11176 int ret = 0;
11177 xmlParserCtxtPtr ctxt;
11178
11179 ctxt = xmlCreateFileParserCtxt(filename);
11180 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011181#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011182 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011183#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011184 xmlFree(ctxt->sax);
11185 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011186 xmlDetectSAX2(ctxt);
11187
Owen Taylor3473f882001-02-23 17:55:21 +000011188 if (user_data != NULL)
11189 ctxt->userData = user_data;
11190
11191 xmlParseDocument(ctxt);
11192
11193 if (ctxt->wellFormed)
11194 ret = 0;
11195 else {
11196 if (ctxt->errNo != 0)
11197 ret = ctxt->errNo;
11198 else
11199 ret = -1;
11200 }
11201 if (sax != NULL)
11202 ctxt->sax = NULL;
11203 xmlFreeParserCtxt(ctxt);
11204
11205 return ret;
11206}
Daniel Veillard81273902003-09-30 00:43:48 +000011207#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011208
11209/************************************************************************
11210 * *
11211 * Front ends when parsing from memory *
11212 * *
11213 ************************************************************************/
11214
11215/**
11216 * xmlCreateMemoryParserCtxt:
11217 * @buffer: a pointer to a char array
11218 * @size: the size of the array
11219 *
11220 * Create a parser context for an XML in-memory document.
11221 *
11222 * Returns the new parser context or NULL
11223 */
11224xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011225xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011226 xmlParserCtxtPtr ctxt;
11227 xmlParserInputPtr input;
11228 xmlParserInputBufferPtr buf;
11229
11230 if (buffer == NULL)
11231 return(NULL);
11232 if (size <= 0)
11233 return(NULL);
11234
11235 ctxt = xmlNewParserCtxt();
11236 if (ctxt == NULL)
11237 return(NULL);
11238
Daniel Veillard53350552003-09-18 13:35:51 +000011239 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011240 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011241 if (buf == NULL) {
11242 xmlFreeParserCtxt(ctxt);
11243 return(NULL);
11244 }
Owen Taylor3473f882001-02-23 17:55:21 +000011245
11246 input = xmlNewInputStream(ctxt);
11247 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011248 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011249 xmlFreeParserCtxt(ctxt);
11250 return(NULL);
11251 }
11252
11253 input->filename = NULL;
11254 input->buf = buf;
11255 input->base = input->buf->buffer->content;
11256 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011257 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011258
11259 inputPush(ctxt, input);
11260 return(ctxt);
11261}
11262
Daniel Veillard81273902003-09-30 00:43:48 +000011263#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011264/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011265 * xmlSAXParseMemoryWithData:
11266 * @sax: the SAX handler block
11267 * @buffer: an pointer to a char array
11268 * @size: the size of the array
11269 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11270 * documents
11271 * @data: the userdata
11272 *
11273 * parse an XML in-memory block and use the given SAX function block
11274 * to handle the parsing callback. If sax is NULL, fallback to the default
11275 * DOM tree building routines.
11276 *
11277 * User data (void *) is stored within the parser context in the
11278 * context's _private member, so it is available nearly everywhere in libxml
11279 *
11280 * Returns the resulting document tree
11281 */
11282
11283xmlDocPtr
11284xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11285 int size, int recovery, void *data) {
11286 xmlDocPtr ret;
11287 xmlParserCtxtPtr ctxt;
11288
11289 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11290 if (ctxt == NULL) return(NULL);
11291 if (sax != NULL) {
11292 if (ctxt->sax != NULL)
11293 xmlFree(ctxt->sax);
11294 ctxt->sax = sax;
11295 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011296 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011297 if (data!=NULL) {
11298 ctxt->_private=data;
11299 }
11300
Daniel Veillardadba5f12003-04-04 16:09:01 +000011301 ctxt->recovery = recovery;
11302
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011303 xmlParseDocument(ctxt);
11304
11305 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11306 else {
11307 ret = NULL;
11308 xmlFreeDoc(ctxt->myDoc);
11309 ctxt->myDoc = NULL;
11310 }
11311 if (sax != NULL)
11312 ctxt->sax = NULL;
11313 xmlFreeParserCtxt(ctxt);
11314
11315 return(ret);
11316}
11317
11318/**
Owen Taylor3473f882001-02-23 17:55:21 +000011319 * xmlSAXParseMemory:
11320 * @sax: the SAX handler block
11321 * @buffer: an pointer to a char array
11322 * @size: the size of the array
11323 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11324 * documents
11325 *
11326 * parse an XML in-memory block and use the given SAX function block
11327 * to handle the parsing callback. If sax is NULL, fallback to the default
11328 * DOM tree building routines.
11329 *
11330 * Returns the resulting document tree
11331 */
11332xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011333xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11334 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011335 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011336}
11337
11338/**
11339 * xmlParseMemory:
11340 * @buffer: an pointer to a char array
11341 * @size: the size of the array
11342 *
11343 * parse an XML in-memory block and build a tree.
11344 *
11345 * Returns the resulting document tree
11346 */
11347
Daniel Veillard50822cb2001-07-26 20:05:51 +000011348xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011349 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11350}
11351
11352/**
11353 * xmlRecoverMemory:
11354 * @buffer: an pointer to a char array
11355 * @size: the size of the array
11356 *
11357 * parse an XML in-memory block and build a tree.
11358 * In the case the document is not Well Formed, a tree is built anyway
11359 *
11360 * Returns the resulting document tree
11361 */
11362
Daniel Veillard50822cb2001-07-26 20:05:51 +000011363xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011364 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11365}
11366
11367/**
11368 * xmlSAXUserParseMemory:
11369 * @sax: a SAX handler
11370 * @user_data: The user data returned on SAX callbacks
11371 * @buffer: an in-memory XML document input
11372 * @size: the length of the XML document in bytes
11373 *
11374 * A better SAX parsing routine.
11375 * parse an XML in-memory buffer and call the given SAX handler routines.
11376 *
11377 * Returns 0 in case of success or a error number otherwise
11378 */
11379int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011380 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011381 int ret = 0;
11382 xmlParserCtxtPtr ctxt;
11383 xmlSAXHandlerPtr oldsax = NULL;
11384
Daniel Veillard9e923512002-08-14 08:48:52 +000011385 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011386 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11387 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011388 oldsax = ctxt->sax;
11389 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011390 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011391 if (user_data != NULL)
11392 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011393
11394 xmlParseDocument(ctxt);
11395
11396 if (ctxt->wellFormed)
11397 ret = 0;
11398 else {
11399 if (ctxt->errNo != 0)
11400 ret = ctxt->errNo;
11401 else
11402 ret = -1;
11403 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011404 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011405 xmlFreeParserCtxt(ctxt);
11406
11407 return ret;
11408}
Daniel Veillard81273902003-09-30 00:43:48 +000011409#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011410
11411/**
11412 * xmlCreateDocParserCtxt:
11413 * @cur: a pointer to an array of xmlChar
11414 *
11415 * Creates a parser context for an XML in-memory document.
11416 *
11417 * Returns the new parser context or NULL
11418 */
11419xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011420xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011421 int len;
11422
11423 if (cur == NULL)
11424 return(NULL);
11425 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011426 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011427}
11428
Daniel Veillard81273902003-09-30 00:43:48 +000011429#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011430/**
11431 * xmlSAXParseDoc:
11432 * @sax: the SAX handler block
11433 * @cur: a pointer to an array of xmlChar
11434 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11435 * documents
11436 *
11437 * parse an XML in-memory document and build a tree.
11438 * It use the given SAX function block to handle the parsing callback.
11439 * If sax is NULL, fallback to the default DOM tree building routines.
11440 *
11441 * Returns the resulting document tree
11442 */
11443
11444xmlDocPtr
11445xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11446 xmlDocPtr ret;
11447 xmlParserCtxtPtr ctxt;
11448
11449 if (cur == NULL) return(NULL);
11450
11451
11452 ctxt = xmlCreateDocParserCtxt(cur);
11453 if (ctxt == NULL) return(NULL);
11454 if (sax != NULL) {
11455 ctxt->sax = sax;
11456 ctxt->userData = NULL;
11457 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011458 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011459
11460 xmlParseDocument(ctxt);
11461 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11462 else {
11463 ret = NULL;
11464 xmlFreeDoc(ctxt->myDoc);
11465 ctxt->myDoc = NULL;
11466 }
11467 if (sax != NULL)
11468 ctxt->sax = NULL;
11469 xmlFreeParserCtxt(ctxt);
11470
11471 return(ret);
11472}
11473
11474/**
11475 * xmlParseDoc:
11476 * @cur: a pointer to an array of xmlChar
11477 *
11478 * parse an XML in-memory document and build a tree.
11479 *
11480 * Returns the resulting document tree
11481 */
11482
11483xmlDocPtr
11484xmlParseDoc(xmlChar *cur) {
11485 return(xmlSAXParseDoc(NULL, cur, 0));
11486}
Daniel Veillard81273902003-09-30 00:43:48 +000011487#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011488
Daniel Veillard81273902003-09-30 00:43:48 +000011489#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011490/************************************************************************
11491 * *
11492 * Specific function to keep track of entities references *
11493 * and used by the XSLT debugger *
11494 * *
11495 ************************************************************************/
11496
11497static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11498
11499/**
11500 * xmlAddEntityReference:
11501 * @ent : A valid entity
11502 * @firstNode : A valid first node for children of entity
11503 * @lastNode : A valid last node of children entity
11504 *
11505 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11506 */
11507static void
11508xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11509 xmlNodePtr lastNode)
11510{
11511 if (xmlEntityRefFunc != NULL) {
11512 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11513 }
11514}
11515
11516
11517/**
11518 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011519 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011520 *
11521 * Set the function to call call back when a xml reference has been made
11522 */
11523void
11524xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11525{
11526 xmlEntityRefFunc = func;
11527}
Daniel Veillard81273902003-09-30 00:43:48 +000011528#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011529
11530/************************************************************************
11531 * *
11532 * Miscellaneous *
11533 * *
11534 ************************************************************************/
11535
11536#ifdef LIBXML_XPATH_ENABLED
11537#include <libxml/xpath.h>
11538#endif
11539
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011540extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011541static int xmlParserInitialized = 0;
11542
11543/**
11544 * xmlInitParser:
11545 *
11546 * Initialization function for the XML parser.
11547 * This is not reentrant. Call once before processing in case of
11548 * use in multithreaded programs.
11549 */
11550
11551void
11552xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011553 if (xmlParserInitialized != 0)
11554 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011555
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011556 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11557 (xmlGenericError == NULL))
11558 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011559 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011560 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011561 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011562 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011563 xmlDefaultSAXHandlerInit();
11564 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011565#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011566 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011567#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011568#ifdef LIBXML_HTML_ENABLED
11569 htmlInitAutoClose();
11570 htmlDefaultSAXHandlerInit();
11571#endif
11572#ifdef LIBXML_XPATH_ENABLED
11573 xmlXPathInit();
11574#endif
11575 xmlParserInitialized = 1;
11576}
11577
11578/**
11579 * xmlCleanupParser:
11580 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011581 * Cleanup function for the XML library. It tries to reclaim all
11582 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000011583 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000011584 * function should not prevent reusing the library but one should
11585 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000011586 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011587 */
11588
11589void
11590xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011591 if (!xmlParserInitialized)
11592 return;
11593
Owen Taylor3473f882001-02-23 17:55:21 +000011594 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011595#ifdef LIBXML_CATALOG_ENABLED
11596 xmlCatalogCleanup();
11597#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000011598 xmlCleanupInputCallbacks();
11599#ifdef LIBXML_OUTPUT_ENABLED
11600 xmlCleanupOutputCallbacks();
11601#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011602 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011603 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000011604 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000011605 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000011606 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011607}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011608
11609/************************************************************************
11610 * *
11611 * New set (2.6.0) of simpler and more flexible APIs *
11612 * *
11613 ************************************************************************/
11614
11615/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011616 * DICT_FREE:
11617 * @str: a string
11618 *
11619 * Free a string if it is not owned by the "dict" dictionnary in the
11620 * current scope
11621 */
11622#define DICT_FREE(str) \
11623 if ((str) && ((!dict) || \
11624 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11625 xmlFree((char *)(str));
11626
11627/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011628 * xmlCtxtReset:
11629 * @ctxt: an XML parser context
11630 *
11631 * Reset a parser context
11632 */
11633void
11634xmlCtxtReset(xmlParserCtxtPtr ctxt)
11635{
11636 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011637 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011638
11639 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11640 xmlFreeInputStream(input);
11641 }
11642 ctxt->inputNr = 0;
11643 ctxt->input = NULL;
11644
11645 ctxt->spaceNr = 0;
11646 ctxt->spaceTab[0] = -1;
11647 ctxt->space = &ctxt->spaceTab[0];
11648
11649
11650 ctxt->nodeNr = 0;
11651 ctxt->node = NULL;
11652
11653 ctxt->nameNr = 0;
11654 ctxt->name = NULL;
11655
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011656 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011657 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011658 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011659 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011660 DICT_FREE(ctxt->directory);
11661 ctxt->directory = NULL;
11662 DICT_FREE(ctxt->extSubURI);
11663 ctxt->extSubURI = NULL;
11664 DICT_FREE(ctxt->extSubSystem);
11665 ctxt->extSubSystem = NULL;
11666 if (ctxt->myDoc != NULL)
11667 xmlFreeDoc(ctxt->myDoc);
11668 ctxt->myDoc = NULL;
11669
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011670 ctxt->standalone = -1;
11671 ctxt->hasExternalSubset = 0;
11672 ctxt->hasPErefs = 0;
11673 ctxt->html = 0;
11674 ctxt->external = 0;
11675 ctxt->instate = XML_PARSER_START;
11676 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011677
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011678 ctxt->wellFormed = 1;
11679 ctxt->nsWellFormed = 1;
11680 ctxt->valid = 1;
11681 ctxt->vctxt.userData = ctxt;
11682 ctxt->vctxt.error = xmlParserValidityError;
11683 ctxt->vctxt.warning = xmlParserValidityWarning;
11684 ctxt->record_info = 0;
11685 ctxt->nbChars = 0;
11686 ctxt->checkIndex = 0;
11687 ctxt->inSubset = 0;
11688 ctxt->errNo = XML_ERR_OK;
11689 ctxt->depth = 0;
11690 ctxt->charset = XML_CHAR_ENCODING_UTF8;
11691 ctxt->catalogs = NULL;
11692 xmlInitNodeInfoSeq(&ctxt->node_seq);
11693
11694 if (ctxt->attsDefault != NULL) {
11695 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
11696 ctxt->attsDefault = NULL;
11697 }
11698 if (ctxt->attsSpecial != NULL) {
11699 xmlHashFree(ctxt->attsSpecial, NULL);
11700 ctxt->attsSpecial = NULL;
11701 }
11702
Daniel Veillard4432df22003-09-28 18:58:27 +000011703#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011704 if (ctxt->catalogs != NULL)
11705 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000011706#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000011707 if (ctxt->lastError.code != XML_ERR_OK)
11708 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011709}
11710
11711/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011712 * xmlCtxtResetPush:
11713 * @ctxt: an XML parser context
11714 * @chunk: a pointer to an array of chars
11715 * @size: number of chars in the array
11716 * @filename: an optional file name or URI
11717 * @encoding: the document encoding, or NULL
11718 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011719 * Reset a push parser context
11720 *
11721 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011722 */
11723int
11724xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
11725 int size, const char *filename, const char *encoding)
11726{
11727 xmlParserInputPtr inputStream;
11728 xmlParserInputBufferPtr buf;
11729 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11730
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011731 if (ctxt == NULL)
11732 return(1);
11733
Daniel Veillard9ba8e382003-10-28 21:31:45 +000011734 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
11735 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11736
11737 buf = xmlAllocParserInputBuffer(enc);
11738 if (buf == NULL)
11739 return(1);
11740
11741 if (ctxt == NULL) {
11742 xmlFreeParserInputBuffer(buf);
11743 return(1);
11744 }
11745
11746 xmlCtxtReset(ctxt);
11747
11748 if (ctxt->pushTab == NULL) {
11749 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
11750 sizeof(xmlChar *));
11751 if (ctxt->pushTab == NULL) {
11752 xmlErrMemory(ctxt, NULL);
11753 xmlFreeParserInputBuffer(buf);
11754 return(1);
11755 }
11756 }
11757
11758 if (filename == NULL) {
11759 ctxt->directory = NULL;
11760 } else {
11761 ctxt->directory = xmlParserGetDirectory(filename);
11762 }
11763
11764 inputStream = xmlNewInputStream(ctxt);
11765 if (inputStream == NULL) {
11766 xmlFreeParserInputBuffer(buf);
11767 return(1);
11768 }
11769
11770 if (filename == NULL)
11771 inputStream->filename = NULL;
11772 else
11773 inputStream->filename = (char *)
11774 xmlCanonicPath((const xmlChar *) filename);
11775 inputStream->buf = buf;
11776 inputStream->base = inputStream->buf->buffer->content;
11777 inputStream->cur = inputStream->buf->buffer->content;
11778 inputStream->end =
11779 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11780
11781 inputPush(ctxt, inputStream);
11782
11783 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11784 (ctxt->input->buf != NULL)) {
11785 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11786 int cur = ctxt->input->cur - ctxt->input->base;
11787
11788 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11789
11790 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11791 ctxt->input->cur = ctxt->input->base + cur;
11792 ctxt->input->end =
11793 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
11794 use];
11795#ifdef DEBUG_PUSH
11796 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11797#endif
11798 }
11799
11800 if (encoding != NULL) {
11801 xmlCharEncodingHandlerPtr hdlr;
11802
11803 hdlr = xmlFindCharEncodingHandler(encoding);
11804 if (hdlr != NULL) {
11805 xmlSwitchToEncoding(ctxt, hdlr);
11806 } else {
11807 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
11808 "Unsupported encoding %s\n", BAD_CAST encoding);
11809 }
11810 } else if (enc != XML_CHAR_ENCODING_NONE) {
11811 xmlSwitchEncoding(ctxt, enc);
11812 }
11813
11814 return(0);
11815}
11816
11817/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011818 * xmlCtxtUseOptions:
11819 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011820 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011821 *
11822 * Applies the options to the parser context
11823 *
11824 * Returns 0 in case of success, the set of unknown or unimplemented options
11825 * in case of error.
11826 */
11827int
11828xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
11829{
11830 if (options & XML_PARSE_RECOVER) {
11831 ctxt->recovery = 1;
11832 options -= XML_PARSE_RECOVER;
11833 } else
11834 ctxt->recovery = 0;
11835 if (options & XML_PARSE_DTDLOAD) {
11836 ctxt->loadsubset = XML_DETECT_IDS;
11837 options -= XML_PARSE_DTDLOAD;
11838 } else
11839 ctxt->loadsubset = 0;
11840 if (options & XML_PARSE_DTDATTR) {
11841 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
11842 options -= XML_PARSE_DTDATTR;
11843 }
11844 if (options & XML_PARSE_NOENT) {
11845 ctxt->replaceEntities = 1;
11846 /* ctxt->loadsubset |= XML_DETECT_IDS; */
11847 options -= XML_PARSE_NOENT;
11848 } else
11849 ctxt->replaceEntities = 0;
11850 if (options & XML_PARSE_NOWARNING) {
11851 ctxt->sax->warning = NULL;
11852 options -= XML_PARSE_NOWARNING;
11853 }
11854 if (options & XML_PARSE_NOERROR) {
11855 ctxt->sax->error = NULL;
11856 ctxt->sax->fatalError = NULL;
11857 options -= XML_PARSE_NOERROR;
11858 }
11859 if (options & XML_PARSE_PEDANTIC) {
11860 ctxt->pedantic = 1;
11861 options -= XML_PARSE_PEDANTIC;
11862 } else
11863 ctxt->pedantic = 0;
11864 if (options & XML_PARSE_NOBLANKS) {
11865 ctxt->keepBlanks = 0;
11866 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
11867 options -= XML_PARSE_NOBLANKS;
11868 } else
11869 ctxt->keepBlanks = 1;
11870 if (options & XML_PARSE_DTDVALID) {
11871 ctxt->validate = 1;
11872 if (options & XML_PARSE_NOWARNING)
11873 ctxt->vctxt.warning = NULL;
11874 if (options & XML_PARSE_NOERROR)
11875 ctxt->vctxt.error = NULL;
11876 options -= XML_PARSE_DTDVALID;
11877 } else
11878 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000011879#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011880 if (options & XML_PARSE_SAX1) {
11881 ctxt->sax->startElement = xmlSAX2StartElement;
11882 ctxt->sax->endElement = xmlSAX2EndElement;
11883 ctxt->sax->startElementNs = NULL;
11884 ctxt->sax->endElementNs = NULL;
11885 ctxt->sax->initialized = 1;
11886 options -= XML_PARSE_SAX1;
11887 }
Daniel Veillard81273902003-09-30 00:43:48 +000011888#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011889 if (options & XML_PARSE_NODICT) {
11890 ctxt->dictNames = 0;
11891 options -= XML_PARSE_NODICT;
11892 } else {
11893 ctxt->dictNames = 1;
11894 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000011895 if (options & XML_PARSE_NOCDATA) {
11896 ctxt->sax->cdataBlock = NULL;
11897 options -= XML_PARSE_NOCDATA;
11898 }
11899 if (options & XML_PARSE_NSCLEAN) {
11900 ctxt->options |= XML_PARSE_NSCLEAN;
11901 options -= XML_PARSE_NSCLEAN;
11902 }
Daniel Veillard61b93382003-11-03 14:28:31 +000011903 if (options & XML_PARSE_NONET) {
11904 ctxt->options |= XML_PARSE_NONET;
11905 options -= XML_PARSE_NONET;
11906 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000011907 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011908 return (options);
11909}
11910
11911/**
11912 * xmlDoRead:
11913 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000011914 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011915 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011916 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011917 * @reuse: keep the context for reuse
11918 *
11919 * Common front-end for the xmlRead functions
11920 *
11921 * Returns the resulting document tree or NULL
11922 */
11923static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000011924xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
11925 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011926{
11927 xmlDocPtr ret;
11928
11929 xmlCtxtUseOptions(ctxt, options);
11930 if (encoding != NULL) {
11931 xmlCharEncodingHandlerPtr hdlr;
11932
11933 hdlr = xmlFindCharEncodingHandler(encoding);
11934 if (hdlr != NULL)
11935 xmlSwitchToEncoding(ctxt, hdlr);
11936 }
Daniel Veillard60942de2003-09-25 21:05:58 +000011937 if ((URL != NULL) && (ctxt->input != NULL) &&
11938 (ctxt->input->filename == NULL))
11939 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011940 xmlParseDocument(ctxt);
11941 if ((ctxt->wellFormed) || ctxt->recovery)
11942 ret = ctxt->myDoc;
11943 else {
11944 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011945 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000011946 if ((ctxt->dictNames) &&
11947 (ctxt->myDoc->dict == ctxt->dict))
11948 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011949 xmlFreeDoc(ctxt->myDoc);
11950 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011951 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011952 ctxt->myDoc = NULL;
11953 if (!reuse) {
11954 if ((ctxt->dictNames) &&
11955 (ret != NULL) &&
11956 (ret->dict == ctxt->dict))
11957 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011958 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011959 } else {
11960 /* Must duplicate the reference to the dictionary */
11961 if ((ctxt->dictNames) &&
11962 (ret != NULL) &&
11963 (ret->dict == ctxt->dict))
11964 xmlDictReference(ctxt->dict);
11965 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011966
11967 return (ret);
11968}
11969
11970/**
11971 * xmlReadDoc:
11972 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000011973 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011974 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011975 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011976 *
11977 * parse an XML in-memory document and build a tree.
11978 *
11979 * Returns the resulting document tree
11980 */
11981xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000011982xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011983{
11984 xmlParserCtxtPtr ctxt;
11985
11986 if (cur == NULL)
11987 return (NULL);
11988
11989 ctxt = xmlCreateDocParserCtxt(cur);
11990 if (ctxt == NULL)
11991 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000011992 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011993}
11994
11995/**
11996 * xmlReadFile:
11997 * @filename: a file or URL
11998 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011999 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012000 *
12001 * parse an XML file from the filesystem or the network.
12002 *
12003 * Returns the resulting document tree
12004 */
12005xmlDocPtr
12006xmlReadFile(const char *filename, const char *encoding, int options)
12007{
12008 xmlParserCtxtPtr ctxt;
12009
Daniel Veillard61b93382003-11-03 14:28:31 +000012010 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012011 if (ctxt == NULL)
12012 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012013 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012014}
12015
12016/**
12017 * xmlReadMemory:
12018 * @buffer: a pointer to a char array
12019 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012020 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012021 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012022 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012023 *
12024 * parse an XML in-memory document and build a tree.
12025 *
12026 * Returns the resulting document tree
12027 */
12028xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012029xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012030{
12031 xmlParserCtxtPtr ctxt;
12032
12033 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12034 if (ctxt == NULL)
12035 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012036 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012037}
12038
12039/**
12040 * xmlReadFd:
12041 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012042 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012043 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012044 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012045 *
12046 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012047 * NOTE that the file descriptor will not be closed when the
12048 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012049 *
12050 * Returns the resulting document tree
12051 */
12052xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012053xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012054{
12055 xmlParserCtxtPtr ctxt;
12056 xmlParserInputBufferPtr input;
12057 xmlParserInputPtr stream;
12058
12059 if (fd < 0)
12060 return (NULL);
12061
12062 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12063 if (input == NULL)
12064 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012065 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012066 ctxt = xmlNewParserCtxt();
12067 if (ctxt == NULL) {
12068 xmlFreeParserInputBuffer(input);
12069 return (NULL);
12070 }
12071 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12072 if (stream == NULL) {
12073 xmlFreeParserInputBuffer(input);
12074 xmlFreeParserCtxt(ctxt);
12075 return (NULL);
12076 }
12077 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012078 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012079}
12080
12081/**
12082 * xmlReadIO:
12083 * @ioread: an I/O read function
12084 * @ioclose: an I/O close function
12085 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012086 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012087 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012088 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012089 *
12090 * parse an XML document from I/O functions and source and build a tree.
12091 *
12092 * Returns the resulting document tree
12093 */
12094xmlDocPtr
12095xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012096 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012097{
12098 xmlParserCtxtPtr ctxt;
12099 xmlParserInputBufferPtr input;
12100 xmlParserInputPtr stream;
12101
12102 if (ioread == NULL)
12103 return (NULL);
12104
12105 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12106 XML_CHAR_ENCODING_NONE);
12107 if (input == NULL)
12108 return (NULL);
12109 ctxt = xmlNewParserCtxt();
12110 if (ctxt == NULL) {
12111 xmlFreeParserInputBuffer(input);
12112 return (NULL);
12113 }
12114 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12115 if (stream == NULL) {
12116 xmlFreeParserInputBuffer(input);
12117 xmlFreeParserCtxt(ctxt);
12118 return (NULL);
12119 }
12120 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012121 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012122}
12123
12124/**
12125 * xmlCtxtReadDoc:
12126 * @ctxt: an XML parser context
12127 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012128 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012129 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012130 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012131 *
12132 * parse an XML in-memory document and build a tree.
12133 * This reuses the existing @ctxt parser context
12134 *
12135 * Returns the resulting document tree
12136 */
12137xmlDocPtr
12138xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012139 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012140{
12141 xmlParserInputPtr stream;
12142
12143 if (cur == NULL)
12144 return (NULL);
12145 if (ctxt == NULL)
12146 return (NULL);
12147
12148 xmlCtxtReset(ctxt);
12149
12150 stream = xmlNewStringInputStream(ctxt, cur);
12151 if (stream == NULL) {
12152 return (NULL);
12153 }
12154 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012155 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012156}
12157
12158/**
12159 * xmlCtxtReadFile:
12160 * @ctxt: an XML parser context
12161 * @filename: a file or URL
12162 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012163 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012164 *
12165 * parse an XML file from the filesystem or the network.
12166 * This reuses the existing @ctxt parser context
12167 *
12168 * Returns the resulting document tree
12169 */
12170xmlDocPtr
12171xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12172 const char *encoding, int options)
12173{
12174 xmlParserInputPtr stream;
12175
12176 if (filename == NULL)
12177 return (NULL);
12178 if (ctxt == NULL)
12179 return (NULL);
12180
12181 xmlCtxtReset(ctxt);
12182
12183 stream = xmlNewInputFromFile(ctxt, filename);
12184 if (stream == NULL) {
12185 return (NULL);
12186 }
12187 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012188 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012189}
12190
12191/**
12192 * xmlCtxtReadMemory:
12193 * @ctxt: an XML parser context
12194 * @buffer: a pointer to a char array
12195 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012196 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012197 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012198 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012199 *
12200 * parse an XML in-memory document and build a tree.
12201 * This reuses the existing @ctxt parser context
12202 *
12203 * Returns the resulting document tree
12204 */
12205xmlDocPtr
12206xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012207 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012208{
12209 xmlParserInputBufferPtr input;
12210 xmlParserInputPtr stream;
12211
12212 if (ctxt == NULL)
12213 return (NULL);
12214 if (buffer == NULL)
12215 return (NULL);
12216
12217 xmlCtxtReset(ctxt);
12218
12219 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12220 if (input == NULL) {
12221 return(NULL);
12222 }
12223
12224 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12225 if (stream == NULL) {
12226 xmlFreeParserInputBuffer(input);
12227 return(NULL);
12228 }
12229
12230 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012231 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012232}
12233
12234/**
12235 * xmlCtxtReadFd:
12236 * @ctxt: an XML parser context
12237 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012238 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012239 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012240 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012241 *
12242 * parse an XML from a file descriptor and build a tree.
12243 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012244 * NOTE that the file descriptor will not be closed when the
12245 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012246 *
12247 * Returns the resulting document tree
12248 */
12249xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012250xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12251 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012252{
12253 xmlParserInputBufferPtr input;
12254 xmlParserInputPtr stream;
12255
12256 if (fd < 0)
12257 return (NULL);
12258 if (ctxt == NULL)
12259 return (NULL);
12260
12261 xmlCtxtReset(ctxt);
12262
12263
12264 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12265 if (input == NULL)
12266 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012267 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012268 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12269 if (stream == NULL) {
12270 xmlFreeParserInputBuffer(input);
12271 return (NULL);
12272 }
12273 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012274 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012275}
12276
12277/**
12278 * xmlCtxtReadIO:
12279 * @ctxt: an XML parser context
12280 * @ioread: an I/O read function
12281 * @ioclose: an I/O close function
12282 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012283 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012284 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012285 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012286 *
12287 * parse an XML document from I/O functions and source and build a tree.
12288 * This reuses the existing @ctxt parser context
12289 *
12290 * Returns the resulting document tree
12291 */
12292xmlDocPtr
12293xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12294 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012295 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012296 const char *encoding, int options)
12297{
12298 xmlParserInputBufferPtr input;
12299 xmlParserInputPtr stream;
12300
12301 if (ioread == NULL)
12302 return (NULL);
12303 if (ctxt == NULL)
12304 return (NULL);
12305
12306 xmlCtxtReset(ctxt);
12307
12308 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12309 XML_CHAR_ENCODING_NONE);
12310 if (input == NULL)
12311 return (NULL);
12312 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12313 if (stream == NULL) {
12314 xmlFreeParserInputBuffer(input);
12315 return (NULL);
12316 }
12317 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012318 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012319}