blob: eea0d4223678132bb87394f48fea938f214f38a6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
81 * MAX_DEPTH:
82 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
87#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
105/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000106xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
107 const xmlChar **str);
108
Daniel Veillard7d515752003-09-26 19:12:37 +0000109static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
111 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000112 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000114
Daniel Veillard81273902003-09-30 00:43:48 +0000115#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000116static void
117xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
118 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000120
Daniel Veillard7d515752003-09-26 19:12:37 +0000121static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000122xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
123 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000124
125/************************************************************************
126 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000127 * Some factorized error routines *
128 * *
129 ************************************************************************/
130
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000131
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000132/**
133 * xmlErrMemory:
134 * @ctxt: an XML parser context
135 * @extra: extra informations
136 *
137 * Handle a redefinition of attribute error
138 */
139static void
140xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
141{
142 if (ctxt != NULL) {
143 ctxt->errNo = XML_ERR_NO_MEMORY;
144 ctxt->instate = XML_PARSER_EOF;
145 ctxt->disableSAX = 1;
146 }
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000147 if (extra)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000148 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
149 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
150 NULL, NULL, 0, 0,
151 "Memory allocation failed : %s\n", extra);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 else
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
155 NULL, NULL, 0, 0, "Memory allocation failed\n");
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000156}
157
158/**
159 * xmlErrAttributeDup:
160 * @ctxt: an XML parser context
161 * @prefix: the attribute prefix
162 * @localname: the attribute localname
163 *
164 * Handle a redefinition of attribute error
165 */
166static void
167xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
168 const xmlChar * localname)
169{
170 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000171 if (prefix == NULL)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000172 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
173 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
174 (const char *) localname, NULL, NULL, 0, 0,
175 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000176 else
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000177 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
178 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
179 (const char *) prefix, (const char *) localname,
180 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
181 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000182 ctxt->wellFormed = 0;
183 if (ctxt->recovery == 0)
184 ctxt->disableSAX = 1;
185}
186
187/**
188 * xmlFatalErr:
189 * @ctxt: an XML parser context
190 * @error: the error number
191 * @extra: extra information string
192 *
193 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
194 */
195static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000196xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000197{
198 const char *errmsg;
199
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000200 switch (error) {
201 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000202 errmsg = "CharRef: invalid hexadecimal value\n";
203 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000204 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000205 errmsg = "CharRef: invalid decimal value\n";
206 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000207 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000208 errmsg = "CharRef: invalid value\n";
209 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000210 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000211 errmsg = "internal error";
212 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000213 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000214 errmsg = "PEReference at end of document\n";
215 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000216 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000217 errmsg = "PEReference in prolog\n";
218 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000219 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000220 errmsg = "PEReference in epilog\n";
221 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000222 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000223 errmsg = "PEReference: no name\n";
224 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000225 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000226 errmsg = "PEReference: expecting ';'\n";
227 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000229 errmsg = "Detected an entity reference loop\n";
230 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000231 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000232 errmsg = "EntityValue: \" or ' expected\n";
233 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000234 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000235 errmsg = "PEReferences forbidden in internal subset\n";
236 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000238 errmsg = "EntityValue: \" or ' expected\n";
239 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000241 errmsg = "AttValue: \" or ' expected\n";
242 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000243 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000244 errmsg = "Unescaped '<' not allowed in attributes values\n";
245 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000246 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000247 errmsg = "SystemLiteral \" or ' expected\n";
248 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000249 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000250 errmsg = "Unfinished System or Public ID \" or ' expected\n";
251 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000252 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000253 errmsg = "Sequence ']]>' not allowed in content\n";
254 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000255 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
257 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000258 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000259 errmsg = "PUBLIC, the Public Identifier is missing\n";
260 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000261 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 errmsg = "Comment must not contain '--' (double-hyphen)\n";
263 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000264 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 errmsg = "xmlParsePI : no target name\n";
266 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000267 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000268 errmsg = "Invalid PI name\n";
269 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000271 errmsg = "NOTATION: Name expected here\n";
272 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000273 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000274 errmsg = "'>' required to close NOTATION declaration\n";
275 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000276 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000277 errmsg = "Entity value required\n";
278 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000279 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280 errmsg = "Fragment not allowed";
281 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000282 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000283 errmsg = "'(' required to start ATTLIST enumeration\n";
284 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000285 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000286 errmsg = "NmToken expected in ATTLIST enumeration\n";
287 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000288 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "')' required to finish ATTLIST enumeration\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "ContentDecl : Name or '(' expected\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg =
305 "PEReference: forbidden within markup decl in internal subset\n";
306 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000308 errmsg = "expected '>'\n";
309 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000310 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 errmsg = "XML conditional section '[' expected\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "Content error in the external subset\n";
315 break;
316 case XML_ERR_CONDSEC_INVALID_KEYWORD:
317 errmsg =
318 "conditional section INCLUDE or IGNORE keyword expected\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "XML conditional section not closed\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "Text declaration '<?xml' required\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "parsing XML declaration: '?>' expected\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "external parsed entities cannot be standalone\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "EntityRef: expecting ';'\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "DOCTYPE improperly terminated\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "EndTag: '</' not found\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "expected '='\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "String not closed expecting \" or '\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "String not started expecting ' or \"\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "Invalid XML encoding name\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "standalone accepts only 'yes' or 'no'\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "Document is empty\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Extra content at the end of the document\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "chunk is not well balanced\n";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "extra content at the end of well balanced chunk\n";
367 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000368 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "Malformed declaration expecting version\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 case:
373 errmsg = "\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 default:
377 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 }
379 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
381 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
382 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 ctxt->wellFormed = 0;
384 if (ctxt->recovery == 0)
385 ctxt->disableSAX = 1;
386}
387
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388/**
389 * xmlFatalErrMsg:
390 * @ctxt: an XML parser context
391 * @error: the error number
392 * @msg: the error message
393 *
394 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
395 */
396static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
398 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000399{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000400 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
402 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000403 ctxt->wellFormed = 0;
404 if (ctxt->recovery == 0)
405 ctxt->disableSAX = 1;
406}
407
408/**
409 * xmlFatalErrMsgInt:
410 * @ctxt: an XML parser context
411 * @error: the error number
412 * @msg: the error message
413 * @val: an integer value
414 *
415 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
416 */
417static void
418xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000420{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000421 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 __xmlRaiseError(NULL, NULL,
423 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
424 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000425 ctxt->wellFormed = 0;
426 if (ctxt->recovery == 0)
427 ctxt->disableSAX = 1;
428}
429
430/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000431 * xmlFatalErrMsgStr:
432 * @ctxt: an XML parser context
433 * @error: the error number
434 * @msg: the error message
435 * @val: a string value
436 *
437 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
438 */
439static void
440xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000442{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000443 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 __xmlRaiseError(NULL, NULL, ctxt, NULL,
445 XML_FROM_PARSER, error, XML_ERR_FATAL,
446 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
447 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000448 ctxt->wellFormed = 0;
449 if (ctxt->recovery == 0)
450 ctxt->disableSAX = 1;
451}
452
453/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000454 * xmlNsErr:
455 * @ctxt: an XML parser context
456 * @error: the error number
457 * @msg: the message
458 * @info1: extra information string
459 * @info2: extra information string
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 const xmlChar * info1, const xmlChar * info2,
467 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000468{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
471 XML_ERR_ERROR, NULL, 0, (const char *) info1,
472 (const char *) info2, (const char *) info3, 0, 0, msg,
473 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->nsWellFormed = 0;
475}
476
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477/************************************************************************
478 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000479 * SAX2 defaulted attributes handling *
480 * *
481 ************************************************************************/
482
483/**
484 * xmlDetectSAX2:
485 * @ctxt: an XML parser context
486 *
487 * Do the SAX2 detection and specific intialization
488 */
489static void
490xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
491 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000492#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000493 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
494 ((ctxt->sax->startElementNs != NULL) ||
495 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000496#else
497 ctxt->sax2 = 1;
498#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000499
500 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
501 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
502 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
503}
504
Daniel Veillarde57ec792003-09-10 10:50:59 +0000505typedef struct _xmlDefAttrs xmlDefAttrs;
506typedef xmlDefAttrs *xmlDefAttrsPtr;
507struct _xmlDefAttrs {
508 int nbAttrs; /* number of defaulted attributes on that element */
509 int maxAttrs; /* the size of the array */
510 const xmlChar *values[4]; /* array of localname/prefix/values */
511};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000512
513/**
514 * xmlAddDefAttrs:
515 * @ctxt: an XML parser context
516 * @fullname: the element fullname
517 * @fullattr: the attribute fullname
518 * @value: the attribute value
519 *
520 * Add a defaulted attribute for an element
521 */
522static void
523xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
524 const xmlChar *fullname,
525 const xmlChar *fullattr,
526 const xmlChar *value) {
527 xmlDefAttrsPtr defaults;
528 int len;
529 const xmlChar *name;
530 const xmlChar *prefix;
531
532 if (ctxt->attsDefault == NULL) {
533 ctxt->attsDefault = xmlHashCreate(10);
534 if (ctxt->attsDefault == NULL)
535 goto mem_error;
536 }
537
538 /*
539 * plit the element name into prefix:localname , the string found
540 * are within the DTD and hen not associated to namespace names.
541 */
542 name = xmlSplitQName3(fullname, &len);
543 if (name == NULL) {
544 name = xmlDictLookup(ctxt->dict, fullname, -1);
545 prefix = NULL;
546 } else {
547 name = xmlDictLookup(ctxt->dict, name, -1);
548 prefix = xmlDictLookup(ctxt->dict, fullname, len);
549 }
550
551 /*
552 * make sure there is some storage
553 */
554 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
555 if (defaults == NULL) {
556 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
557 12 * sizeof(const xmlChar *));
558 if (defaults == NULL)
559 goto mem_error;
560 defaults->maxAttrs = 4;
561 defaults->nbAttrs = 0;
562 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
563 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
564 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
565 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
566 if (defaults == NULL)
567 goto mem_error;
568 defaults->maxAttrs *= 2;
569 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
570 }
571
572 /*
573 * plit the element name into prefix:localname , the string found
574 * are within the DTD and hen not associated to namespace names.
575 */
576 name = xmlSplitQName3(fullattr, &len);
577 if (name == NULL) {
578 name = xmlDictLookup(ctxt->dict, fullattr, -1);
579 prefix = NULL;
580 } else {
581 name = xmlDictLookup(ctxt->dict, name, -1);
582 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
583 }
584
585 defaults->values[4 * defaults->nbAttrs] = name;
586 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
587 /* intern the string and precompute the end */
588 len = xmlStrlen(value);
589 value = xmlDictLookup(ctxt->dict, value, len);
590 defaults->values[4 * defaults->nbAttrs + 2] = value;
591 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
592 defaults->nbAttrs++;
593
594 return;
595
596mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000597 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000598 return;
599}
600
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000601/**
602 * xmlAddSpecialAttr:
603 * @ctxt: an XML parser context
604 * @fullname: the element fullname
605 * @fullattr: the attribute fullname
606 * @type: the attribute type
607 *
608 * Register that this attribute is not CDATA
609 */
610static void
611xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
612 const xmlChar *fullname,
613 const xmlChar *fullattr,
614 int type)
615{
616 if (ctxt->attsSpecial == NULL) {
617 ctxt->attsSpecial = xmlHashCreate(10);
618 if (ctxt->attsSpecial == NULL)
619 goto mem_error;
620 }
621
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000622 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
623 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000624 return;
625
626mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000627 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000628 return;
629}
630
Daniel Veillard4432df22003-09-28 18:58:27 +0000631/**
632 * xmlCheckLanguageID:
633 * @lang: pointer to the string value
634 *
635 * Checks that the value conforms to the LanguageID production:
636 *
637 * NOTE: this is somewhat deprecated, those productions were removed from
638 * the XML Second edition.
639 *
640 * [33] LanguageID ::= Langcode ('-' Subcode)*
641 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
642 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
643 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
644 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
645 * [38] Subcode ::= ([a-z] | [A-Z])+
646 *
647 * Returns 1 if correct 0 otherwise
648 **/
649int
650xmlCheckLanguageID(const xmlChar * lang)
651{
652 const xmlChar *cur = lang;
653
654 if (cur == NULL)
655 return (0);
656 if (((cur[0] == 'i') && (cur[1] == '-')) ||
657 ((cur[0] == 'I') && (cur[1] == '-'))) {
658 /*
659 * IANA code
660 */
661 cur += 2;
662 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
663 ((cur[0] >= 'a') && (cur[0] <= 'z')))
664 cur++;
665 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
666 ((cur[0] == 'X') && (cur[1] == '-'))) {
667 /*
668 * User code
669 */
670 cur += 2;
671 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
672 ((cur[0] >= 'a') && (cur[0] <= 'z')))
673 cur++;
674 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
675 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
676 /*
677 * ISO639
678 */
679 cur++;
680 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
681 ((cur[0] >= 'a') && (cur[0] <= 'z')))
682 cur++;
683 else
684 return (0);
685 } else
686 return (0);
687 while (cur[0] != 0) { /* non input consuming */
688 if (cur[0] != '-')
689 return (0);
690 cur++;
691 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
692 ((cur[0] >= 'a') && (cur[0] <= 'z')))
693 cur++;
694 else
695 return (0);
696 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
697 ((cur[0] >= 'a') && (cur[0] <= 'z')))
698 cur++;
699 }
700 return (1);
701}
702
Owen Taylor3473f882001-02-23 17:55:21 +0000703/************************************************************************
704 * *
705 * Parser stacks related functions and macros *
706 * *
707 ************************************************************************/
708
709xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
710 const xmlChar ** str);
711
Daniel Veillard0fb18932003-09-07 09:14:37 +0000712#ifdef SAX2
713/**
714 * nsPush:
715 * @ctxt: an XML parser context
716 * @prefix: the namespace prefix or NULL
717 * @URL: the namespace name
718 *
719 * Pushes a new parser namespace on top of the ns stack
720 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000721 * Returns -1 in case of error, -2 if the namespace should be discarded
722 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000723 */
724static int
725nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
726{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000727 if (ctxt->options & XML_PARSE_NSCLEAN) {
728 int i;
729 for (i = 0;i < ctxt->nsNr;i += 2) {
730 if (ctxt->nsTab[i] == prefix) {
731 /* in scope */
732 if (ctxt->nsTab[i + 1] == URL)
733 return(-2);
734 /* out of scope keep it */
735 break;
736 }
737 }
738 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000739 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
740 ctxt->nsMax = 10;
741 ctxt->nsNr = 0;
742 ctxt->nsTab = (const xmlChar **)
743 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
744 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000745 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000746 ctxt->nsMax = 0;
747 return (-1);
748 }
749 } else if (ctxt->nsNr >= ctxt->nsMax) {
750 ctxt->nsMax *= 2;
751 ctxt->nsTab = (const xmlChar **)
752 xmlRealloc(ctxt->nsTab,
753 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
754 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000755 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000756 ctxt->nsMax /= 2;
757 return (-1);
758 }
759 }
760 ctxt->nsTab[ctxt->nsNr++] = prefix;
761 ctxt->nsTab[ctxt->nsNr++] = URL;
762 return (ctxt->nsNr);
763}
764/**
765 * nsPop:
766 * @ctxt: an XML parser context
767 * @nr: the number to pop
768 *
769 * Pops the top @nr parser prefix/namespace from the ns stack
770 *
771 * Returns the number of namespaces removed
772 */
773static int
774nsPop(xmlParserCtxtPtr ctxt, int nr)
775{
776 int i;
777
778 if (ctxt->nsTab == NULL) return(0);
779 if (ctxt->nsNr < nr) {
780 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
781 nr = ctxt->nsNr;
782 }
783 if (ctxt->nsNr <= 0)
784 return (0);
785
786 for (i = 0;i < nr;i++) {
787 ctxt->nsNr--;
788 ctxt->nsTab[ctxt->nsNr] = NULL;
789 }
790 return(nr);
791}
792#endif
793
794static int
795xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
796 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000797 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000798 int maxatts;
799
800 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000801 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000802 atts = (const xmlChar **)
803 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000804 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000805 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
807 if (attallocs == NULL) goto mem_error;
808 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000809 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000810 } else if (nr + 5 > ctxt->maxatts) {
811 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000812 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
813 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000814 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000815 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000816 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
817 (maxatts / 5) * sizeof(int));
818 if (attallocs == NULL) goto mem_error;
819 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000820 ctxt->maxatts = maxatts;
821 }
822 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000823mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000824 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000825 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000826}
827
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000828/**
829 * inputPush:
830 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000831 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000832 *
833 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000834 *
835 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000836 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000837extern int
838inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
839{
840 if (ctxt->inputNr >= ctxt->inputMax) {
841 ctxt->inputMax *= 2;
842 ctxt->inputTab =
843 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
844 ctxt->inputMax *
845 sizeof(ctxt->inputTab[0]));
846 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000847 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000848 return (0);
849 }
850 }
851 ctxt->inputTab[ctxt->inputNr] = value;
852 ctxt->input = value;
853 return (ctxt->inputNr++);
854}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000856 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000857 * @ctxt: an XML parser context
858 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000859 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000860 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000861 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000862 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000863extern xmlParserInputPtr
864inputPop(xmlParserCtxtPtr ctxt)
865{
866 xmlParserInputPtr ret;
867
868 if (ctxt->inputNr <= 0)
869 return (0);
870 ctxt->inputNr--;
871 if (ctxt->inputNr > 0)
872 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
873 else
874 ctxt->input = NULL;
875 ret = ctxt->inputTab[ctxt->inputNr];
876 ctxt->inputTab[ctxt->inputNr] = 0;
877 return (ret);
878}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000879/**
880 * nodePush:
881 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000882 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000883 *
884 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000885 *
886 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000887 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000888extern int
889nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
890{
891 if (ctxt->nodeNr >= ctxt->nodeMax) {
892 ctxt->nodeMax *= 2;
893 ctxt->nodeTab =
894 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
895 ctxt->nodeMax *
896 sizeof(ctxt->nodeTab[0]));
897 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000898 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000899 return (0);
900 }
901 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000902#ifdef MAX_DEPTH
903 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000904 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000905 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000906 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000907 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000908 return(0);
909 }
910#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000911 ctxt->nodeTab[ctxt->nodeNr] = value;
912 ctxt->node = value;
913 return (ctxt->nodeNr++);
914}
915/**
916 * nodePop:
917 * @ctxt: an XML parser context
918 *
919 * Pops the top element node from the node stack
920 *
921 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000922 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000923extern xmlNodePtr
924nodePop(xmlParserCtxtPtr ctxt)
925{
926 xmlNodePtr ret;
927
928 if (ctxt->nodeNr <= 0)
929 return (0);
930 ctxt->nodeNr--;
931 if (ctxt->nodeNr > 0)
932 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
933 else
934 ctxt->node = NULL;
935 ret = ctxt->nodeTab[ctxt->nodeNr];
936 ctxt->nodeTab[ctxt->nodeNr] = 0;
937 return (ret);
938}
939/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000940 * nameNsPush:
941 * @ctxt: an XML parser context
942 * @value: the element name
943 * @prefix: the element prefix
944 * @URI: the element namespace name
945 *
946 * Pushes a new element name/prefix/URL on top of the name stack
947 *
948 * Returns -1 in case of error, the index in the stack otherwise
949 */
950static int
951nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
952 const xmlChar *prefix, const xmlChar *URI, int nsNr)
953{
954 if (ctxt->nameNr >= ctxt->nameMax) {
955 const xmlChar * *tmp;
956 void **tmp2;
957 ctxt->nameMax *= 2;
958 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
959 ctxt->nameMax *
960 sizeof(ctxt->nameTab[0]));
961 if (tmp == NULL) {
962 ctxt->nameMax /= 2;
963 goto mem_error;
964 }
965 ctxt->nameTab = tmp;
966 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
967 ctxt->nameMax * 3 *
968 sizeof(ctxt->pushTab[0]));
969 if (tmp2 == NULL) {
970 ctxt->nameMax /= 2;
971 goto mem_error;
972 }
973 ctxt->pushTab = tmp2;
974 }
975 ctxt->nameTab[ctxt->nameNr] = value;
976 ctxt->name = value;
977 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
978 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000979 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980 return (ctxt->nameNr++);
981mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000982 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000983 return (-1);
984}
985/**
986 * nameNsPop:
987 * @ctxt: an XML parser context
988 *
989 * Pops the top element/prefix/URI name from the name stack
990 *
991 * Returns the name just removed
992 */
993static const xmlChar *
994nameNsPop(xmlParserCtxtPtr ctxt)
995{
996 const xmlChar *ret;
997
998 if (ctxt->nameNr <= 0)
999 return (0);
1000 ctxt->nameNr--;
1001 if (ctxt->nameNr > 0)
1002 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1003 else
1004 ctxt->name = NULL;
1005 ret = ctxt->nameTab[ctxt->nameNr];
1006 ctxt->nameTab[ctxt->nameNr] = NULL;
1007 return (ret);
1008}
1009
1010/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001011 * namePush:
1012 * @ctxt: an XML parser context
1013 * @value: the element name
1014 *
1015 * Pushes a new element name on top of the name stack
1016 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 */
1019extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001020namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001021{
1022 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001023 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001024 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001025 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001026 ctxt->nameMax *
1027 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 if (tmp == NULL) {
1029 ctxt->nameMax /= 2;
1030 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001031 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001033 }
1034 ctxt->nameTab[ctxt->nameNr] = value;
1035 ctxt->name = value;
1036 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001037mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001038 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001039 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001040}
1041/**
1042 * namePop:
1043 * @ctxt: an XML parser context
1044 *
1045 * Pops the top element name from the name stack
1046 *
1047 * Returns the name just removed
1048 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001049extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050namePop(xmlParserCtxtPtr ctxt)
1051{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001052 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001053
1054 if (ctxt->nameNr <= 0)
1055 return (0);
1056 ctxt->nameNr--;
1057 if (ctxt->nameNr > 0)
1058 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1059 else
1060 ctxt->name = NULL;
1061 ret = ctxt->nameTab[ctxt->nameNr];
1062 ctxt->nameTab[ctxt->nameNr] = 0;
1063 return (ret);
1064}
Owen Taylor3473f882001-02-23 17:55:21 +00001065
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001066static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001067 if (ctxt->spaceNr >= ctxt->spaceMax) {
1068 ctxt->spaceMax *= 2;
1069 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1070 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1071 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001072 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001073 return(0);
1074 }
1075 }
1076 ctxt->spaceTab[ctxt->spaceNr] = val;
1077 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1078 return(ctxt->spaceNr++);
1079}
1080
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001081static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001082 int ret;
1083 if (ctxt->spaceNr <= 0) return(0);
1084 ctxt->spaceNr--;
1085 if (ctxt->spaceNr > 0)
1086 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1087 else
1088 ctxt->space = NULL;
1089 ret = ctxt->spaceTab[ctxt->spaceNr];
1090 ctxt->spaceTab[ctxt->spaceNr] = -1;
1091 return(ret);
1092}
1093
1094/*
1095 * Macros for accessing the content. Those should be used only by the parser,
1096 * and not exported.
1097 *
1098 * Dirty macros, i.e. one often need to make assumption on the context to
1099 * use them
1100 *
1101 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1102 * To be used with extreme caution since operations consuming
1103 * characters may move the input buffer to a different location !
1104 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1105 * This should be used internally by the parser
1106 * only to compare to ASCII values otherwise it would break when
1107 * running with UTF-8 encoding.
1108 * RAW same as CUR but in the input buffer, bypass any token
1109 * extraction that may have been done
1110 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1111 * to compare on ASCII based substring.
1112 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001113 * strings without newlines within the parser.
1114 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1115 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001116 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1117 *
1118 * NEXT Skip to the next character, this does the proper decoding
1119 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001120 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001121 * CUR_CHAR(l) returns the current unicode character (int), set l
1122 * to the number of xmlChars used for the encoding [0-5].
1123 * CUR_SCHAR same but operate on a string instead of the context
1124 * COPY_BUF copy the current unicode char to the target buffer, increment
1125 * the index
1126 * GROW, SHRINK handling of input buffers
1127 */
1128
Daniel Veillardfdc91562002-07-01 21:52:03 +00001129#define RAW (*ctxt->input->cur)
1130#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001131#define NXT(val) ctxt->input->cur[(val)]
1132#define CUR_PTR ctxt->input->cur
1133
1134#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001135 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001137 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001138 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1139 xmlPopInput(ctxt); \
1140 } while (0)
1141
Daniel Veillarda880b122003-04-21 21:36:41 +00001142#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001143 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1144 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001145 xmlSHRINK (ctxt);
1146
1147static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1148 xmlParserInputShrink(ctxt->input);
1149 if ((*ctxt->input->cur == 0) &&
1150 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1151 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001152 }
Owen Taylor3473f882001-02-23 17:55:21 +00001153
Daniel Veillarda880b122003-04-21 21:36:41 +00001154#define GROW if ((ctxt->progressive == 0) && \
1155 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001156 xmlGROW (ctxt);
1157
1158static void xmlGROW (xmlParserCtxtPtr ctxt) {
1159 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1160 if ((*ctxt->input->cur == 0) &&
1161 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1162 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001163}
Owen Taylor3473f882001-02-23 17:55:21 +00001164
1165#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1166
1167#define NEXT xmlNextChar(ctxt)
1168
Daniel Veillard21a0f912001-02-25 19:54:14 +00001169#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001170 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001171 ctxt->input->cur++; \
1172 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001173 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001174 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1175 }
1176
Owen Taylor3473f882001-02-23 17:55:21 +00001177#define NEXTL(l) do { \
1178 if (*(ctxt->input->cur) == '\n') { \
1179 ctxt->input->line++; ctxt->input->col = 1; \
1180 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001181 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001182 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001183 } while (0)
1184
1185#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1186#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1187
1188#define COPY_BUF(l,b,i,v) \
1189 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001190 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001191
1192/**
1193 * xmlSkipBlankChars:
1194 * @ctxt: the XML parser context
1195 *
1196 * skip all blanks character found at that point in the input streams.
1197 * It pops up finished entities in the process if allowable at that point.
1198 *
1199 * Returns the number of space chars skipped
1200 */
1201
1202int
1203xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001204 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001205
1206 /*
1207 * It's Okay to use CUR/NEXT here since all the blanks are on
1208 * the ASCII range.
1209 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001210 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1211 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001212 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001213 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001214 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001215 cur = ctxt->input->cur;
1216 while (IS_BLANK(*cur)) {
1217 if (*cur == '\n') {
1218 ctxt->input->line++; ctxt->input->col = 1;
1219 }
1220 cur++;
1221 res++;
1222 if (*cur == 0) {
1223 ctxt->input->cur = cur;
1224 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1225 cur = ctxt->input->cur;
1226 }
1227 }
1228 ctxt->input->cur = cur;
1229 } else {
1230 int cur;
1231 do {
1232 cur = CUR;
1233 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1234 NEXT;
1235 cur = CUR;
1236 res++;
1237 }
1238 while ((cur == 0) && (ctxt->inputNr > 1) &&
1239 (ctxt->instate != XML_PARSER_COMMENT)) {
1240 xmlPopInput(ctxt);
1241 cur = CUR;
1242 }
1243 /*
1244 * Need to handle support of entities branching here
1245 */
1246 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1247 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1248 }
Owen Taylor3473f882001-02-23 17:55:21 +00001249 return(res);
1250}
1251
1252/************************************************************************
1253 * *
1254 * Commodity functions to handle entities *
1255 * *
1256 ************************************************************************/
1257
1258/**
1259 * xmlPopInput:
1260 * @ctxt: an XML parser context
1261 *
1262 * xmlPopInput: the current input pointed by ctxt->input came to an end
1263 * pop it and return the next char.
1264 *
1265 * Returns the current xmlChar in the parser context
1266 */
1267xmlChar
1268xmlPopInput(xmlParserCtxtPtr ctxt) {
1269 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1270 if (xmlParserDebugEntities)
1271 xmlGenericError(xmlGenericErrorContext,
1272 "Popping input %d\n", ctxt->inputNr);
1273 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001274 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001275 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1276 return(xmlPopInput(ctxt));
1277 return(CUR);
1278}
1279
1280/**
1281 * xmlPushInput:
1282 * @ctxt: an XML parser context
1283 * @input: an XML parser input fragment (entity, XML fragment ...).
1284 *
1285 * xmlPushInput: switch to a new input stream which is stacked on top
1286 * of the previous one(s).
1287 */
1288void
1289xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1290 if (input == NULL) return;
1291
1292 if (xmlParserDebugEntities) {
1293 if ((ctxt->input != NULL) && (ctxt->input->filename))
1294 xmlGenericError(xmlGenericErrorContext,
1295 "%s(%d): ", ctxt->input->filename,
1296 ctxt->input->line);
1297 xmlGenericError(xmlGenericErrorContext,
1298 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1299 }
1300 inputPush(ctxt, input);
1301 GROW;
1302}
1303
1304/**
1305 * xmlParseCharRef:
1306 * @ctxt: an XML parser context
1307 *
1308 * parse Reference declarations
1309 *
1310 * [66] CharRef ::= '&#' [0-9]+ ';' |
1311 * '&#x' [0-9a-fA-F]+ ';'
1312 *
1313 * [ WFC: Legal Character ]
1314 * Characters referred to using character references must match the
1315 * production for Char.
1316 *
1317 * Returns the value parsed (as an int), 0 in case of error
1318 */
1319int
1320xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001321 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001322 int count = 0;
1323
Owen Taylor3473f882001-02-23 17:55:21 +00001324 /*
1325 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1326 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001327 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001328 (NXT(2) == 'x')) {
1329 SKIP(3);
1330 GROW;
1331 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001332 if (count++ > 20) {
1333 count = 0;
1334 GROW;
1335 }
1336 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001337 val = val * 16 + (CUR - '0');
1338 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1339 val = val * 16 + (CUR - 'a') + 10;
1340 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1341 val = val * 16 + (CUR - 'A') + 10;
1342 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001343 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001344 val = 0;
1345 break;
1346 }
1347 NEXT;
1348 count++;
1349 }
1350 if (RAW == ';') {
1351 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001352 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001353 ctxt->nbChars ++;
1354 ctxt->input->cur++;
1355 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001356 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001357 SKIP(2);
1358 GROW;
1359 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001360 if (count++ > 20) {
1361 count = 0;
1362 GROW;
1363 }
1364 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001365 val = val * 10 + (CUR - '0');
1366 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001367 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001368 val = 0;
1369 break;
1370 }
1371 NEXT;
1372 count++;
1373 }
1374 if (RAW == ';') {
1375 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001376 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001377 ctxt->nbChars ++;
1378 ctxt->input->cur++;
1379 }
1380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001381 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001382 }
1383
1384 /*
1385 * [ WFC: Legal Character ]
1386 * Characters referred to using character references must match the
1387 * production for Char.
1388 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00001389 if (xmlIsChar(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001390 return(val);
1391 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001392 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1393 "xmlParseCharRef: invalid xmlChar value %d\n",
1394 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001395 }
1396 return(0);
1397}
1398
1399/**
1400 * xmlParseStringCharRef:
1401 * @ctxt: an XML parser context
1402 * @str: a pointer to an index in the string
1403 *
1404 * parse Reference declarations, variant parsing from a string rather
1405 * than an an input flow.
1406 *
1407 * [66] CharRef ::= '&#' [0-9]+ ';' |
1408 * '&#x' [0-9a-fA-F]+ ';'
1409 *
1410 * [ WFC: Legal Character ]
1411 * Characters referred to using character references must match the
1412 * production for Char.
1413 *
1414 * Returns the value parsed (as an int), 0 in case of error, str will be
1415 * updated to the current value of the index
1416 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001417static int
Owen Taylor3473f882001-02-23 17:55:21 +00001418xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1419 const xmlChar *ptr;
1420 xmlChar cur;
1421 int val = 0;
1422
1423 if ((str == NULL) || (*str == NULL)) return(0);
1424 ptr = *str;
1425 cur = *ptr;
1426 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1427 ptr += 3;
1428 cur = *ptr;
1429 while (cur != ';') { /* Non input consuming loop */
1430 if ((cur >= '0') && (cur <= '9'))
1431 val = val * 16 + (cur - '0');
1432 else if ((cur >= 'a') && (cur <= 'f'))
1433 val = val * 16 + (cur - 'a') + 10;
1434 else if ((cur >= 'A') && (cur <= 'F'))
1435 val = val * 16 + (cur - 'A') + 10;
1436 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001437 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001438 val = 0;
1439 break;
1440 }
1441 ptr++;
1442 cur = *ptr;
1443 }
1444 if (cur == ';')
1445 ptr++;
1446 } else if ((cur == '&') && (ptr[1] == '#')){
1447 ptr += 2;
1448 cur = *ptr;
1449 while (cur != ';') { /* Non input consuming loops */
1450 if ((cur >= '0') && (cur <= '9'))
1451 val = val * 10 + (cur - '0');
1452 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001453 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001454 val = 0;
1455 break;
1456 }
1457 ptr++;
1458 cur = *ptr;
1459 }
1460 if (cur == ';')
1461 ptr++;
1462 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001463 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001464 return(0);
1465 }
1466 *str = ptr;
1467
1468 /*
1469 * [ WFC: Legal Character ]
1470 * Characters referred to using character references must match the
1471 * production for Char.
1472 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00001473 if (xmlIsChar(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001474 return(val);
1475 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001476 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1477 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1478 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001479 }
1480 return(0);
1481}
1482
1483/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001484 * xmlNewBlanksWrapperInputStream:
1485 * @ctxt: an XML parser context
1486 * @entity: an Entity pointer
1487 *
1488 * Create a new input stream for wrapping
1489 * blanks around a PEReference
1490 *
1491 * Returns the new input stream or NULL
1492 */
1493
1494static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1495
Daniel Veillardf4862f02002-09-10 11:13:43 +00001496static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001497xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1498 xmlParserInputPtr input;
1499 xmlChar *buffer;
1500 size_t length;
1501 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1503 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001504 return(NULL);
1505 }
1506 if (xmlParserDebugEntities)
1507 xmlGenericError(xmlGenericErrorContext,
1508 "new blanks wrapper for entity: %s\n", entity->name);
1509 input = xmlNewInputStream(ctxt);
1510 if (input == NULL) {
1511 return(NULL);
1512 }
1513 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001514 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001515 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001516 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001517 return(NULL);
1518 }
1519 buffer [0] = ' ';
1520 buffer [1] = '%';
1521 buffer [length-3] = ';';
1522 buffer [length-2] = ' ';
1523 buffer [length-1] = 0;
1524 memcpy(buffer + 2, entity->name, length - 5);
1525 input->free = deallocblankswrapper;
1526 input->base = buffer;
1527 input->cur = buffer;
1528 input->length = length;
1529 input->end = &buffer[length];
1530 return(input);
1531}
1532
1533/**
Owen Taylor3473f882001-02-23 17:55:21 +00001534 * xmlParserHandlePEReference:
1535 * @ctxt: the parser context
1536 *
1537 * [69] PEReference ::= '%' Name ';'
1538 *
1539 * [ WFC: No Recursion ]
1540 * A parsed entity must not contain a recursive
1541 * reference to itself, either directly or indirectly.
1542 *
1543 * [ WFC: Entity Declared ]
1544 * In a document without any DTD, a document with only an internal DTD
1545 * subset which contains no parameter entity references, or a document
1546 * with "standalone='yes'", ... ... The declaration of a parameter
1547 * entity must precede any reference to it...
1548 *
1549 * [ VC: Entity Declared ]
1550 * In a document with an external subset or external parameter entities
1551 * with "standalone='no'", ... ... The declaration of a parameter entity
1552 * must precede any reference to it...
1553 *
1554 * [ WFC: In DTD ]
1555 * Parameter-entity references may only appear in the DTD.
1556 * NOTE: misleading but this is handled.
1557 *
1558 * A PEReference may have been detected in the current input stream
1559 * the handling is done accordingly to
1560 * http://www.w3.org/TR/REC-xml#entproc
1561 * i.e.
1562 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001563 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001564 */
1565void
1566xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001567 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001568 xmlEntityPtr entity = NULL;
1569 xmlParserInputPtr input;
1570
Owen Taylor3473f882001-02-23 17:55:21 +00001571 if (RAW != '%') return;
1572 switch(ctxt->instate) {
1573 case XML_PARSER_CDATA_SECTION:
1574 return;
1575 case XML_PARSER_COMMENT:
1576 return;
1577 case XML_PARSER_START_TAG:
1578 return;
1579 case XML_PARSER_END_TAG:
1580 return;
1581 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001582 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001583 return;
1584 case XML_PARSER_PROLOG:
1585 case XML_PARSER_START:
1586 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001587 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001588 return;
1589 case XML_PARSER_ENTITY_DECL:
1590 case XML_PARSER_CONTENT:
1591 case XML_PARSER_ATTRIBUTE_VALUE:
1592 case XML_PARSER_PI:
1593 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001594 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001595 /* we just ignore it there */
1596 return;
1597 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001598 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001599 return;
1600 case XML_PARSER_ENTITY_VALUE:
1601 /*
1602 * NOTE: in the case of entity values, we don't do the
1603 * substitution here since we need the literal
1604 * entity value to be able to save the internal
1605 * subset of the document.
1606 * This will be handled by xmlStringDecodeEntities
1607 */
1608 return;
1609 case XML_PARSER_DTD:
1610 /*
1611 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1612 * In the internal DTD subset, parameter-entity references
1613 * can occur only where markup declarations can occur, not
1614 * within markup declarations.
1615 * In that case this is handled in xmlParseMarkupDecl
1616 */
1617 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1618 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001619 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1620 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001621 break;
1622 case XML_PARSER_IGNORE:
1623 return;
1624 }
1625
1626 NEXT;
1627 name = xmlParseName(ctxt);
1628 if (xmlParserDebugEntities)
1629 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001630 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001631 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001632 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001633 } else {
1634 if (RAW == ';') {
1635 NEXT;
1636 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1637 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1638 if (entity == NULL) {
1639
1640 /*
1641 * [ WFC: Entity Declared ]
1642 * In a document without any DTD, a document with only an
1643 * internal DTD subset which contains no parameter entity
1644 * references, or a document with "standalone='yes'", ...
1645 * ... The declaration of a parameter entity must precede
1646 * any reference to it...
1647 */
1648 if ((ctxt->standalone == 1) ||
1649 ((ctxt->hasExternalSubset == 0) &&
1650 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001651 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001652 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001653 } else {
1654 /*
1655 * [ VC: Entity Declared ]
1656 * In a document with an external subset or external
1657 * parameter entities with "standalone='no'", ...
1658 * ... The declaration of a parameter entity must precede
1659 * any reference to it...
1660 */
1661 if ((!ctxt->disableSAX) &&
1662 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1663 ctxt->vctxt.error(ctxt->vctxt.userData,
1664 "PEReference: %%%s; not found\n", name);
1665 } else if ((!ctxt->disableSAX) &&
1666 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1667 ctxt->sax->warning(ctxt->userData,
1668 "PEReference: %%%s; not found\n", name);
1669 ctxt->valid = 0;
1670 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001671 } else if (ctxt->input->free != deallocblankswrapper) {
1672 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1673 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001674 } else {
1675 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1676 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001677 xmlChar start[4];
1678 xmlCharEncoding enc;
1679
Owen Taylor3473f882001-02-23 17:55:21 +00001680 /*
1681 * handle the extra spaces added before and after
1682 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001683 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001684 */
1685 input = xmlNewEntityInputStream(ctxt, entity);
1686 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001687
1688 /*
1689 * Get the 4 first bytes and decode the charset
1690 * if enc != XML_CHAR_ENCODING_NONE
1691 * plug some encoding conversion routines.
1692 */
1693 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001694 if (entity->length >= 4) {
1695 start[0] = RAW;
1696 start[1] = NXT(1);
1697 start[2] = NXT(2);
1698 start[3] = NXT(3);
1699 enc = xmlDetectCharEncoding(start, 4);
1700 if (enc != XML_CHAR_ENCODING_NONE) {
1701 xmlSwitchEncoding(ctxt, enc);
1702 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001703 }
1704
Owen Taylor3473f882001-02-23 17:55:21 +00001705 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1706 (RAW == '<') && (NXT(1) == '?') &&
1707 (NXT(2) == 'x') && (NXT(3) == 'm') &&
1708 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1709 xmlParseTextDecl(ctxt);
1710 }
Owen Taylor3473f882001-02-23 17:55:21 +00001711 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001712 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1713 "PEReference: %s is not a parameter entity\n",
1714 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001715 }
1716 }
1717 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001718 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001719 }
Owen Taylor3473f882001-02-23 17:55:21 +00001720 }
1721}
1722
1723/*
1724 * Macro used to grow the current buffer.
1725 */
1726#define growBuffer(buffer) { \
1727 buffer##_size *= 2; \
1728 buffer = (xmlChar *) \
1729 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001730 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001731}
1732
1733/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001734 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001735 * @ctxt: the parser context
1736 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001737 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001738 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1739 * @end: an end marker xmlChar, 0 if none
1740 * @end2: an end marker xmlChar, 0 if none
1741 * @end3: an end marker xmlChar, 0 if none
1742 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001743 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001744 *
1745 * [67] Reference ::= EntityRef | CharRef
1746 *
1747 * [69] PEReference ::= '%' Name ';'
1748 *
1749 * Returns A newly allocated string with the substitution done. The caller
1750 * must deallocate it !
1751 */
1752xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001753xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1754 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001755 xmlChar *buffer = NULL;
1756 int buffer_size = 0;
1757
1758 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001759 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 xmlEntityPtr ent;
1761 int c,l;
1762 int nbchars = 0;
1763
Daniel Veillarde57ec792003-09-10 10:50:59 +00001764 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001766 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001767
1768 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001769 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001770 return(NULL);
1771 }
1772
1773 /*
1774 * allocate a translation buffer.
1775 */
1776 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001777 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001778 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001779
1780 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001781 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001782 * we are operating on already parsed values.
1783 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 if (str < last)
1785 c = CUR_SCHAR(str, l);
1786 else
1787 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001788 while ((c != 0) && (c != end) && /* non input consuming loop */
1789 (c != end2) && (c != end3)) {
1790
1791 if (c == 0) break;
1792 if ((c == '&') && (str[1] == '#')) {
1793 int val = xmlParseStringCharRef(ctxt, &str);
1794 if (val != 0) {
1795 COPY_BUF(0,buffer,nbchars,val);
1796 }
1797 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1798 if (xmlParserDebugEntities)
1799 xmlGenericError(xmlGenericErrorContext,
1800 "String decoding Entity Reference: %.30s\n",
1801 str);
1802 ent = xmlParseStringEntityRef(ctxt, &str);
1803 if ((ent != NULL) &&
1804 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1805 if (ent->content != NULL) {
1806 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1807 } else {
1808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1809 ctxt->sax->error(ctxt->userData,
1810 "internal error entity has no content\n");
1811 }
1812 } else if ((ent != NULL) && (ent->content != NULL)) {
1813 xmlChar *rep;
1814
1815 ctxt->depth++;
1816 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1817 0, 0, 0);
1818 ctxt->depth--;
1819 if (rep != NULL) {
1820 current = rep;
1821 while (*current != 0) { /* non input consuming loop */
1822 buffer[nbchars++] = *current++;
1823 if (nbchars >
1824 buffer_size - XML_PARSER_BUFFER_SIZE) {
1825 growBuffer(buffer);
1826 }
1827 }
1828 xmlFree(rep);
1829 }
1830 } else if (ent != NULL) {
1831 int i = xmlStrlen(ent->name);
1832 const xmlChar *cur = ent->name;
1833
1834 buffer[nbchars++] = '&';
1835 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1836 growBuffer(buffer);
1837 }
1838 for (;i > 0;i--)
1839 buffer[nbchars++] = *cur++;
1840 buffer[nbchars++] = ';';
1841 }
1842 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1843 if (xmlParserDebugEntities)
1844 xmlGenericError(xmlGenericErrorContext,
1845 "String decoding PE Reference: %.30s\n", str);
1846 ent = xmlParseStringPEReference(ctxt, &str);
1847 if (ent != NULL) {
1848 xmlChar *rep;
1849
1850 ctxt->depth++;
1851 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1852 0, 0, 0);
1853 ctxt->depth--;
1854 if (rep != NULL) {
1855 current = rep;
1856 while (*current != 0) { /* non input consuming loop */
1857 buffer[nbchars++] = *current++;
1858 if (nbchars >
1859 buffer_size - XML_PARSER_BUFFER_SIZE) {
1860 growBuffer(buffer);
1861 }
1862 }
1863 xmlFree(rep);
1864 }
1865 }
1866 } else {
1867 COPY_BUF(l,buffer,nbchars,c);
1868 str += l;
1869 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1870 growBuffer(buffer);
1871 }
1872 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 if (str < last)
1874 c = CUR_SCHAR(str, l);
1875 else
1876 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001877 }
1878 buffer[nbchars++] = 0;
1879 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001880
1881mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001882 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001883 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001884}
1885
Daniel Veillarde57ec792003-09-10 10:50:59 +00001886/**
1887 * xmlStringDecodeEntities:
1888 * @ctxt: the parser context
1889 * @str: the input string
1890 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1891 * @end: an end marker xmlChar, 0 if none
1892 * @end2: an end marker xmlChar, 0 if none
1893 * @end3: an end marker xmlChar, 0 if none
1894 *
1895 * Takes a entity string content and process to do the adequate substitutions.
1896 *
1897 * [67] Reference ::= EntityRef | CharRef
1898 *
1899 * [69] PEReference ::= '%' Name ';'
1900 *
1901 * Returns A newly allocated string with the substitution done. The caller
1902 * must deallocate it !
1903 */
1904xmlChar *
1905xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1906 xmlChar end, xmlChar end2, xmlChar end3) {
1907 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1908 end, end2, end3));
1909}
Owen Taylor3473f882001-02-23 17:55:21 +00001910
1911/************************************************************************
1912 * *
1913 * Commodity functions to handle xmlChars *
1914 * *
1915 ************************************************************************/
1916
1917/**
1918 * xmlStrndup:
1919 * @cur: the input xmlChar *
1920 * @len: the len of @cur
1921 *
1922 * a strndup for array of xmlChar's
1923 *
1924 * Returns a new xmlChar * or NULL
1925 */
1926xmlChar *
1927xmlStrndup(const xmlChar *cur, int len) {
1928 xmlChar *ret;
1929
1930 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001931 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001932 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001933 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001934 return(NULL);
1935 }
1936 memcpy(ret, cur, len * sizeof(xmlChar));
1937 ret[len] = 0;
1938 return(ret);
1939}
1940
1941/**
1942 * xmlStrdup:
1943 * @cur: the input xmlChar *
1944 *
1945 * a strdup for array of xmlChar's. Since they are supposed to be
1946 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1947 * a termination mark of '0'.
1948 *
1949 * Returns a new xmlChar * or NULL
1950 */
1951xmlChar *
1952xmlStrdup(const xmlChar *cur) {
1953 const xmlChar *p = cur;
1954
1955 if (cur == NULL) return(NULL);
1956 while (*p != 0) p++; /* non input consuming */
1957 return(xmlStrndup(cur, p - cur));
1958}
1959
1960/**
1961 * xmlCharStrndup:
1962 * @cur: the input char *
1963 * @len: the len of @cur
1964 *
1965 * a strndup for char's to xmlChar's
1966 *
1967 * Returns a new xmlChar * or NULL
1968 */
1969
1970xmlChar *
1971xmlCharStrndup(const char *cur, int len) {
1972 int i;
1973 xmlChar *ret;
1974
1975 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001976 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001977 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001978 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001979 return(NULL);
1980 }
1981 for (i = 0;i < len;i++)
1982 ret[i] = (xmlChar) cur[i];
1983 ret[len] = 0;
1984 return(ret);
1985}
1986
1987/**
1988 * xmlCharStrdup:
1989 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00001990 *
1991 * a strdup for char's to xmlChar's
1992 *
1993 * Returns a new xmlChar * or NULL
1994 */
1995
1996xmlChar *
1997xmlCharStrdup(const char *cur) {
1998 const char *p = cur;
1999
2000 if (cur == NULL) return(NULL);
2001 while (*p != '\0') p++; /* non input consuming */
2002 return(xmlCharStrndup(cur, p - cur));
2003}
2004
2005/**
2006 * xmlStrcmp:
2007 * @str1: the first xmlChar *
2008 * @str2: the second xmlChar *
2009 *
2010 * a strcmp for xmlChar's
2011 *
2012 * Returns the integer result of the comparison
2013 */
2014
2015int
2016xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2017 register int tmp;
2018
2019 if (str1 == str2) return(0);
2020 if (str1 == NULL) return(-1);
2021 if (str2 == NULL) return(1);
2022 do {
2023 tmp = *str1++ - *str2;
2024 if (tmp != 0) return(tmp);
2025 } while (*str2++ != 0);
2026 return 0;
2027}
2028
2029/**
2030 * xmlStrEqual:
2031 * @str1: the first xmlChar *
2032 * @str2: the second xmlChar *
2033 *
2034 * Check if both string are equal of have same content
2035 * Should be a bit more readable and faster than xmlStrEqual()
2036 *
2037 * Returns 1 if they are equal, 0 if they are different
2038 */
2039
2040int
2041xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2042 if (str1 == str2) return(1);
2043 if (str1 == NULL) return(0);
2044 if (str2 == NULL) return(0);
2045 do {
2046 if (*str1++ != *str2) return(0);
2047 } while (*str2++);
2048 return(1);
2049}
2050
2051/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002052 * xmlStrQEqual:
2053 * @pref: the prefix of the QName
2054 * @name: the localname of the QName
2055 * @str: the second xmlChar *
2056 *
2057 * Check if a QName is Equal to a given string
2058 *
2059 * Returns 1 if they are equal, 0 if they are different
2060 */
2061
2062int
2063xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2064 if (pref == NULL) return(xmlStrEqual(name, str));
2065 if (name == NULL) return(0);
2066 if (str == NULL) return(0);
2067
2068 do {
2069 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002070 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002071 if (*str++ != ':') return(0);
2072 do {
2073 if (*name++ != *str) return(0);
2074 } while (*str++);
2075 return(1);
2076}
2077
2078/**
Owen Taylor3473f882001-02-23 17:55:21 +00002079 * xmlStrncmp:
2080 * @str1: the first xmlChar *
2081 * @str2: the second xmlChar *
2082 * @len: the max comparison length
2083 *
2084 * a strncmp for xmlChar's
2085 *
2086 * Returns the integer result of the comparison
2087 */
2088
2089int
2090xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2091 register int tmp;
2092
2093 if (len <= 0) return(0);
2094 if (str1 == str2) return(0);
2095 if (str1 == NULL) return(-1);
2096 if (str2 == NULL) return(1);
2097 do {
2098 tmp = *str1++ - *str2;
2099 if (tmp != 0 || --len == 0) return(tmp);
2100 } while (*str2++ != 0);
2101 return 0;
2102}
2103
Daniel Veillardb44025c2001-10-11 22:55:55 +00002104static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002105 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2106 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2107 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2108 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2109 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2110 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2111 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2112 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2113 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2114 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2115 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2116 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2117 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2120 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2121 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2122 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2123 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2124 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2125 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2126 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2127 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2128 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2129 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2130 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2131 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2132 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2133 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2134 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2135 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2136 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2137};
2138
2139/**
2140 * xmlStrcasecmp:
2141 * @str1: the first xmlChar *
2142 * @str2: the second xmlChar *
2143 *
2144 * a strcasecmp for xmlChar's
2145 *
2146 * Returns the integer result of the comparison
2147 */
2148
2149int
2150xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2151 register int tmp;
2152
2153 if (str1 == str2) return(0);
2154 if (str1 == NULL) return(-1);
2155 if (str2 == NULL) return(1);
2156 do {
2157 tmp = casemap[*str1++] - casemap[*str2];
2158 if (tmp != 0) return(tmp);
2159 } while (*str2++ != 0);
2160 return 0;
2161}
2162
2163/**
2164 * xmlStrncasecmp:
2165 * @str1: the first xmlChar *
2166 * @str2: the second xmlChar *
2167 * @len: the max comparison length
2168 *
2169 * a strncasecmp for xmlChar's
2170 *
2171 * Returns the integer result of the comparison
2172 */
2173
2174int
2175xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2176 register int tmp;
2177
2178 if (len <= 0) return(0);
2179 if (str1 == str2) return(0);
2180 if (str1 == NULL) return(-1);
2181 if (str2 == NULL) return(1);
2182 do {
2183 tmp = casemap[*str1++] - casemap[*str2];
2184 if (tmp != 0 || --len == 0) return(tmp);
2185 } while (*str2++ != 0);
2186 return 0;
2187}
2188
2189/**
2190 * xmlStrchr:
2191 * @str: the xmlChar * array
2192 * @val: the xmlChar to search
2193 *
2194 * a strchr for xmlChar's
2195 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 */
2198
2199const xmlChar *
2200xmlStrchr(const xmlChar *str, xmlChar val) {
2201 if (str == NULL) return(NULL);
2202 while (*str != 0) { /* non input consuming */
2203 if (*str == val) return((xmlChar *) str);
2204 str++;
2205 }
2206 return(NULL);
2207}
2208
2209/**
2210 * xmlStrstr:
2211 * @str: the xmlChar * array (haystack)
2212 * @val: the xmlChar to search (needle)
2213 *
2214 * a strstr for xmlChar's
2215 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002216 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002217 */
2218
2219const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002220xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002221 int n;
2222
2223 if (str == NULL) return(NULL);
2224 if (val == NULL) return(NULL);
2225 n = xmlStrlen(val);
2226
2227 if (n == 0) return(str);
2228 while (*str != 0) { /* non input consuming */
2229 if (*str == *val) {
2230 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2231 }
2232 str++;
2233 }
2234 return(NULL);
2235}
2236
2237/**
2238 * xmlStrcasestr:
2239 * @str: the xmlChar * array (haystack)
2240 * @val: the xmlChar to search (needle)
2241 *
2242 * a case-ignoring strstr for xmlChar's
2243 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002244 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002245 */
2246
2247const xmlChar *
2248xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2249 int n;
2250
2251 if (str == NULL) return(NULL);
2252 if (val == NULL) return(NULL);
2253 n = xmlStrlen(val);
2254
2255 if (n == 0) return(str);
2256 while (*str != 0) { /* non input consuming */
2257 if (casemap[*str] == casemap[*val])
2258 if (!xmlStrncasecmp(str, val, n)) return(str);
2259 str++;
2260 }
2261 return(NULL);
2262}
2263
2264/**
2265 * xmlStrsub:
2266 * @str: the xmlChar * array (haystack)
2267 * @start: the index of the first char (zero based)
2268 * @len: the length of the substring
2269 *
2270 * Extract a substring of a given string
2271 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002272 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002273 */
2274
2275xmlChar *
2276xmlStrsub(const xmlChar *str, int start, int len) {
2277 int i;
2278
2279 if (str == NULL) return(NULL);
2280 if (start < 0) return(NULL);
2281 if (len < 0) return(NULL);
2282
2283 for (i = 0;i < start;i++) {
2284 if (*str == 0) return(NULL);
2285 str++;
2286 }
2287 if (*str == 0) return(NULL);
2288 return(xmlStrndup(str, len));
2289}
2290
2291/**
2292 * xmlStrlen:
2293 * @str: the xmlChar * array
2294 *
2295 * length of a xmlChar's string
2296 *
2297 * Returns the number of xmlChar contained in the ARRAY.
2298 */
2299
2300int
2301xmlStrlen(const xmlChar *str) {
2302 int len = 0;
2303
2304 if (str == NULL) return(0);
2305 while (*str != 0) { /* non input consuming */
2306 str++;
2307 len++;
2308 }
2309 return(len);
2310}
2311
2312/**
2313 * xmlStrncat:
2314 * @cur: the original xmlChar * array
2315 * @add: the xmlChar * array added
2316 * @len: the length of @add
2317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002318 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002319 * first bytes of @add.
2320 *
2321 * Returns a new xmlChar *, the original @cur is reallocated if needed
2322 * and should not be freed
2323 */
2324
2325xmlChar *
2326xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2327 int size;
2328 xmlChar *ret;
2329
2330 if ((add == NULL) || (len == 0))
2331 return(cur);
2332 if (cur == NULL)
2333 return(xmlStrndup(add, len));
2334
2335 size = xmlStrlen(cur);
2336 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2337 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002338 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 return(cur);
2340 }
2341 memcpy(&ret[size], add, len * sizeof(xmlChar));
2342 ret[size + len] = 0;
2343 return(ret);
2344}
2345
2346/**
2347 * xmlStrcat:
2348 * @cur: the original xmlChar * array
2349 * @add: the xmlChar * array added
2350 *
2351 * a strcat for array of xmlChar's. Since they are supposed to be
2352 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2353 * a termination mark of '0'.
2354 *
2355 * Returns a new xmlChar * containing the concatenated string.
2356 */
2357xmlChar *
2358xmlStrcat(xmlChar *cur, const xmlChar *add) {
2359 const xmlChar *p = add;
2360
2361 if (add == NULL) return(cur);
2362 if (cur == NULL)
2363 return(xmlStrdup(add));
2364
2365 while (*p != 0) p++; /* non input consuming */
2366 return(xmlStrncat(cur, add, p - add));
2367}
2368
Aleksey Sanine7acf432003-10-02 20:05:27 +00002369/**
2370 * xmlStrPrintf:
2371 * @buf: the result buffer.
2372 * @len: the result buffer length.
2373 * @msg: the message with printf formatting.
2374 * @...: extra parameters for the message.
2375 *
2376 * Formats @msg and places result into @buf.
2377 *
2378 * Returns the number of characters written to @buf or -1 if an error occurs.
2379 */
2380int
2381xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2382 va_list args;
2383 int ret;
2384
2385 if((buf == NULL) || (msg == NULL)) {
2386 return(-1);
2387 }
2388
2389 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002390 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002391 va_end(args);
2392
2393 return(ret);
2394}
2395
Owen Taylor3473f882001-02-23 17:55:21 +00002396/************************************************************************
2397 * *
2398 * Commodity functions, cleanup needed ? *
2399 * *
2400 ************************************************************************/
2401
2402/**
2403 * areBlanks:
2404 * @ctxt: an XML parser context
2405 * @str: a xmlChar *
2406 * @len: the size of @str
2407 *
2408 * Is this a sequence of blank chars that one can ignore ?
2409 *
2410 * Returns 1 if ignorable 0 otherwise.
2411 */
2412
2413static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2414 int i, ret;
2415 xmlNodePtr lastChild;
2416
Daniel Veillard05c13a22001-09-09 08:38:09 +00002417 /*
2418 * Don't spend time trying to differentiate them, the same callback is
2419 * used !
2420 */
2421 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002422 return(0);
2423
Owen Taylor3473f882001-02-23 17:55:21 +00002424 /*
2425 * Check for xml:space value.
2426 */
2427 if (*(ctxt->space) == 1)
2428 return(0);
2429
2430 /*
2431 * Check that the string is made of blanks
2432 */
2433 for (i = 0;i < len;i++)
2434 if (!(IS_BLANK(str[i]))) return(0);
2435
2436 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002437 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002438 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002439 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002440 if (ctxt->myDoc != NULL) {
2441 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2442 if (ret == 0) return(1);
2443 if (ret == 1) return(0);
2444 }
2445
2446 /*
2447 * Otherwise, heuristic :-\
2448 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002449 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002450 if ((ctxt->node->children == NULL) &&
2451 (RAW == '<') && (NXT(1) == '/')) return(0);
2452
2453 lastChild = xmlGetLastChild(ctxt->node);
2454 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002455 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2456 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002457 } else if (xmlNodeIsText(lastChild))
2458 return(0);
2459 else if ((ctxt->node->children != NULL) &&
2460 (xmlNodeIsText(ctxt->node->children)))
2461 return(0);
2462 return(1);
2463}
2464
Owen Taylor3473f882001-02-23 17:55:21 +00002465/************************************************************************
2466 * *
2467 * Extra stuff for namespace support *
2468 * Relates to http://www.w3.org/TR/WD-xml-names *
2469 * *
2470 ************************************************************************/
2471
2472/**
2473 * xmlSplitQName:
2474 * @ctxt: an XML parser context
2475 * @name: an XML parser context
2476 * @prefix: a xmlChar **
2477 *
2478 * parse an UTF8 encoded XML qualified name string
2479 *
2480 * [NS 5] QName ::= (Prefix ':')? LocalPart
2481 *
2482 * [NS 6] Prefix ::= NCName
2483 *
2484 * [NS 7] LocalPart ::= NCName
2485 *
2486 * Returns the local part, and prefix is updated
2487 * to get the Prefix if any.
2488 */
2489
2490xmlChar *
2491xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2492 xmlChar buf[XML_MAX_NAMELEN + 5];
2493 xmlChar *buffer = NULL;
2494 int len = 0;
2495 int max = XML_MAX_NAMELEN;
2496 xmlChar *ret = NULL;
2497 const xmlChar *cur = name;
2498 int c;
2499
2500 *prefix = NULL;
2501
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002502 if (cur == NULL) return(NULL);
2503
Owen Taylor3473f882001-02-23 17:55:21 +00002504#ifndef XML_XML_NAMESPACE
2505 /* xml: prefix is not really a namespace */
2506 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2507 (cur[2] == 'l') && (cur[3] == ':'))
2508 return(xmlStrdup(name));
2509#endif
2510
Daniel Veillard597bc482003-07-24 16:08:28 +00002511 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002512 if (cur[0] == ':')
2513 return(xmlStrdup(name));
2514
2515 c = *cur++;
2516 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2517 buf[len++] = c;
2518 c = *cur++;
2519 }
2520 if (len >= max) {
2521 /*
2522 * Okay someone managed to make a huge name, so he's ready to pay
2523 * for the processing speed.
2524 */
2525 max = len * 2;
2526
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002527 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002528 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002529 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002530 return(NULL);
2531 }
2532 memcpy(buffer, buf, len);
2533 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2534 if (len + 10 > max) {
2535 max *= 2;
2536 buffer = (xmlChar *) xmlRealloc(buffer,
2537 max * sizeof(xmlChar));
2538 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002539 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002540 return(NULL);
2541 }
2542 }
2543 buffer[len++] = c;
2544 c = *cur++;
2545 }
2546 buffer[len] = 0;
2547 }
2548
Daniel Veillard597bc482003-07-24 16:08:28 +00002549 /* nasty but well=formed
2550 if ((c == ':') && (*cur == 0)) {
2551 return(xmlStrdup(name));
2552 } */
2553
Owen Taylor3473f882001-02-23 17:55:21 +00002554 if (buffer == NULL)
2555 ret = xmlStrndup(buf, len);
2556 else {
2557 ret = buffer;
2558 buffer = NULL;
2559 max = XML_MAX_NAMELEN;
2560 }
2561
2562
2563 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002564 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002565 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002566 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002567 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002568 }
Owen Taylor3473f882001-02-23 17:55:21 +00002569 len = 0;
2570
Daniel Veillardbb284f42002-10-16 18:02:47 +00002571 /*
2572 * Check that the first character is proper to start
2573 * a new name
2574 */
2575 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2576 ((c >= 0x41) && (c <= 0x5A)) ||
2577 (c == '_') || (c == ':'))) {
2578 int l;
2579 int first = CUR_SCHAR(cur, l);
2580
2581 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002582 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002583 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002584 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002585 }
2586 }
2587 cur++;
2588
Owen Taylor3473f882001-02-23 17:55:21 +00002589 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2590 buf[len++] = c;
2591 c = *cur++;
2592 }
2593 if (len >= max) {
2594 /*
2595 * Okay someone managed to make a huge name, so he's ready to pay
2596 * for the processing speed.
2597 */
2598 max = len * 2;
2599
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
2604 }
2605 memcpy(buffer, buf, len);
2606 while (c != 0) { /* tested bigname2.xml */
2607 if (len + 10 > max) {
2608 max *= 2;
2609 buffer = (xmlChar *) xmlRealloc(buffer,
2610 max * sizeof(xmlChar));
2611 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002612 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002613 return(NULL);
2614 }
2615 }
2616 buffer[len++] = c;
2617 c = *cur++;
2618 }
2619 buffer[len] = 0;
2620 }
2621
2622 if (buffer == NULL)
2623 ret = xmlStrndup(buf, len);
2624 else {
2625 ret = buffer;
2626 }
2627 }
2628
2629 return(ret);
2630}
2631
2632/************************************************************************
2633 * *
2634 * The parser itself *
2635 * Relates to http://www.w3.org/TR/REC-xml *
2636 * *
2637 ************************************************************************/
2638
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002639static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002640static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002641 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002642
Owen Taylor3473f882001-02-23 17:55:21 +00002643/**
2644 * xmlParseName:
2645 * @ctxt: an XML parser context
2646 *
2647 * parse an XML name.
2648 *
2649 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2650 * CombiningChar | Extender
2651 *
2652 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2653 *
2654 * [6] Names ::= Name (S Name)*
2655 *
2656 * Returns the Name parsed or NULL
2657 */
2658
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002659const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002660xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002661 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002663 int count = 0;
2664
2665 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002666
2667 /*
2668 * Accelerator for simple ASCII names
2669 */
2670 in = ctxt->input->cur;
2671 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2672 ((*in >= 0x41) && (*in <= 0x5A)) ||
2673 (*in == '_') || (*in == ':')) {
2674 in++;
2675 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2676 ((*in >= 0x41) && (*in <= 0x5A)) ||
2677 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002678 (*in == '_') || (*in == '-') ||
2679 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002680 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002681 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002682 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002683 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002684 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002685 ctxt->nbChars += count;
2686 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002687 if (ret == NULL)
2688 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002689 return(ret);
2690 }
2691 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002692 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002693}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002694
Daniel Veillard46de64e2002-05-29 08:21:33 +00002695/**
2696 * xmlParseNameAndCompare:
2697 * @ctxt: an XML parser context
2698 *
2699 * parse an XML name and compares for match
2700 * (specialized for endtag parsing)
2701 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002702 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2703 * and the name for mismatch
2704 */
2705
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002706static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002707xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2708 const xmlChar *cmp = other;
2709 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002710 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002711
2712 GROW;
2713
2714 in = ctxt->input->cur;
2715 while (*in != 0 && *in == *cmp) {
2716 ++in;
2717 ++cmp;
2718 }
2719 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2720 /* success */
2721 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002722 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002723 }
2724 /* failure (or end of input buffer), check with full function */
2725 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002726 /* strings coming from the dictionnary direct compare possible */
2727 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002729 }
2730 return ret;
2731}
2732
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002733static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002734xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002735 int len = 0, l;
2736 int c;
2737 int count = 0;
2738
2739 /*
2740 * Handler for more complex cases
2741 */
2742 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002743 c = CUR_CHAR(l);
2744 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2745 (!IS_LETTER(c) && (c != '_') &&
2746 (c != ':'))) {
2747 return(NULL);
2748 }
2749
2750 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard73b013f2003-09-30 12:36:01 +00002751 ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002752 (c == '.') || (c == '-') ||
2753 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002754 (xmlIsCombining(c)) ||
2755 (xmlIsExtender(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002756 if (count++ > 100) {
2757 count = 0;
2758 GROW;
2759 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002760 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002761 NEXTL(l);
2762 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002763 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002764 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002765}
2766
2767/**
2768 * xmlParseStringName:
2769 * @ctxt: an XML parser context
2770 * @str: a pointer to the string pointer (IN/OUT)
2771 *
2772 * parse an XML name.
2773 *
2774 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2775 * CombiningChar | Extender
2776 *
2777 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2778 *
2779 * [6] Names ::= Name (S Name)*
2780 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002781 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002782 * is updated to the current location in the string.
2783 */
2784
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002785static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002786xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2787 xmlChar buf[XML_MAX_NAMELEN + 5];
2788 const xmlChar *cur = *str;
2789 int len = 0, l;
2790 int c;
2791
2792 c = CUR_SCHAR(cur, l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002793 if (!xmlIsLetter(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002794 (c != ':')) {
2795 return(NULL);
2796 }
2797
Daniel Veillard73b013f2003-09-30 12:36:01 +00002798 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002799 (c == '.') || (c == '-') ||
2800 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002801 (xmlIsCombining(c)) ||
2802 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002803 COPY_BUF(l,buf,len,c);
2804 cur += l;
2805 c = CUR_SCHAR(cur, l);
2806 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2807 /*
2808 * Okay someone managed to make a huge name, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002820 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
2821 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002822 (c == '.') || (c == '-') ||
2823 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002824 (xmlIsCombining(c)) ||
2825 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (len + 10 > max) {
2827 max *= 2;
2828 buffer = (xmlChar *) xmlRealloc(buffer,
2829 max * sizeof(xmlChar));
2830 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002831 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002832 return(NULL);
2833 }
2834 }
2835 COPY_BUF(l,buffer,len,c);
2836 cur += l;
2837 c = CUR_SCHAR(cur, l);
2838 }
2839 buffer[len] = 0;
2840 *str = cur;
2841 return(buffer);
2842 }
2843 }
2844 *str = cur;
2845 return(xmlStrndup(buf, len));
2846}
2847
2848/**
2849 * xmlParseNmtoken:
2850 * @ctxt: an XML parser context
2851 *
2852 * parse an XML Nmtoken.
2853 *
2854 * [7] Nmtoken ::= (NameChar)+
2855 *
2856 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2857 *
2858 * Returns the Nmtoken parsed or NULL
2859 */
2860
2861xmlChar *
2862xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2863 xmlChar buf[XML_MAX_NAMELEN + 5];
2864 int len = 0, l;
2865 int c;
2866 int count = 0;
2867
2868 GROW;
2869 c = CUR_CHAR(l);
2870
Daniel Veillard73b013f2003-09-30 12:36:01 +00002871 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002872 (c == '.') || (c == '-') ||
2873 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002874 (xmlIsCombining(c)) ||
2875 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002876 if (count++ > 100) {
2877 count = 0;
2878 GROW;
2879 }
2880 COPY_BUF(l,buf,len,c);
2881 NEXTL(l);
2882 c = CUR_CHAR(l);
2883 if (len >= XML_MAX_NAMELEN) {
2884 /*
2885 * Okay someone managed to make a huge token, so he's ready to pay
2886 * for the processing speed.
2887 */
2888 xmlChar *buffer;
2889 int max = len * 2;
2890
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002891 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002892 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002893 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002894 return(NULL);
2895 }
2896 memcpy(buffer, buf, len);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002897 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002898 (c == '.') || (c == '-') ||
2899 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002900 (xmlIsCombining(c)) ||
2901 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002902 if (count++ > 100) {
2903 count = 0;
2904 GROW;
2905 }
2906 if (len + 10 > max) {
2907 max *= 2;
2908 buffer = (xmlChar *) xmlRealloc(buffer,
2909 max * sizeof(xmlChar));
2910 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002911 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002912 return(NULL);
2913 }
2914 }
2915 COPY_BUF(l,buffer,len,c);
2916 NEXTL(l);
2917 c = CUR_CHAR(l);
2918 }
2919 buffer[len] = 0;
2920 return(buffer);
2921 }
2922 }
2923 if (len == 0)
2924 return(NULL);
2925 return(xmlStrndup(buf, len));
2926}
2927
2928/**
2929 * xmlParseEntityValue:
2930 * @ctxt: an XML parser context
2931 * @orig: if non-NULL store a copy of the original entity value
2932 *
2933 * parse a value for ENTITY declarations
2934 *
2935 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2936 * "'" ([^%&'] | PEReference | Reference)* "'"
2937 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002938 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002939 */
2940
2941xmlChar *
2942xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2943 xmlChar *buf = NULL;
2944 int len = 0;
2945 int size = XML_PARSER_BUFFER_SIZE;
2946 int c, l;
2947 xmlChar stop;
2948 xmlChar *ret = NULL;
2949 const xmlChar *cur = NULL;
2950 xmlParserInputPtr input;
2951
2952 if (RAW == '"') stop = '"';
2953 else if (RAW == '\'') stop = '\'';
2954 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002955 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002956 return(NULL);
2957 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002958 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 return(NULL);
2962 }
2963
2964 /*
2965 * The content of the entity definition is copied in a buffer.
2966 */
2967
2968 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2969 input = ctxt->input;
2970 GROW;
2971 NEXT;
2972 c = CUR_CHAR(l);
2973 /*
2974 * NOTE: 4.4.5 Included in Literal
2975 * When a parameter entity reference appears in a literal entity
2976 * value, ... a single or double quote character in the replacement
2977 * text is always treated as a normal data character and will not
2978 * terminate the literal.
2979 * In practice it means we stop the loop only when back at parsing
2980 * the initial entity and the quote is found
2981 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00002982 while ((xmlIsChar(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002983 (ctxt->input != input))) {
2984 if (len + 5 >= size) {
2985 size *= 2;
2986 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2987 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002988 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002989 return(NULL);
2990 }
2991 }
2992 COPY_BUF(l,buf,len,c);
2993 NEXTL(l);
2994 /*
2995 * Pop-up of finished entities.
2996 */
2997 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2998 xmlPopInput(ctxt);
2999
3000 GROW;
3001 c = CUR_CHAR(l);
3002 if (c == 0) {
3003 GROW;
3004 c = CUR_CHAR(l);
3005 }
3006 }
3007 buf[len] = 0;
3008
3009 /*
3010 * Raise problem w.r.t. '&' and '%' being used in non-entities
3011 * reference constructs. Note Charref will be handled in
3012 * xmlStringDecodeEntities()
3013 */
3014 cur = buf;
3015 while (*cur != 0) { /* non input consuming */
3016 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3017 xmlChar *name;
3018 xmlChar tmp = *cur;
3019
3020 cur++;
3021 name = xmlParseStringName(ctxt, &cur);
3022 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003023 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003024 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003025 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003026 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003027 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3028 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003029 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003030 }
3031 if (name != NULL)
3032 xmlFree(name);
3033 }
3034 cur++;
3035 }
3036
3037 /*
3038 * Then PEReference entities are substituted.
3039 */
3040 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003041 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003042 xmlFree(buf);
3043 } else {
3044 NEXT;
3045 /*
3046 * NOTE: 4.4.7 Bypassed
3047 * When a general entity reference appears in the EntityValue in
3048 * an entity declaration, it is bypassed and left as is.
3049 * so XML_SUBSTITUTE_REF is not set here.
3050 */
3051 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3052 0, 0, 0);
3053 if (orig != NULL)
3054 *orig = buf;
3055 else
3056 xmlFree(buf);
3057 }
3058
3059 return(ret);
3060}
3061
3062/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003063 * xmlParseAttValueComplex:
3064 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003065 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003066 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003067 *
3068 * parse a value for an attribute, this is the fallback function
3069 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003071 *
3072 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3073 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003074static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003075xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003076 xmlChar limit = 0;
3077 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003078 int len = 0;
3079 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003080 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 xmlChar *current = NULL;
3082 xmlEntityPtr ent;
3083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (NXT(0) == '"') {
3085 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3086 limit = '"';
3087 NEXT;
3088 } else if (NXT(0) == '\'') {
3089 limit = '\'';
3090 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3091 NEXT;
3092 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003093 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003094 return(NULL);
3095 }
3096
3097 /*
3098 * allocate a translation buffer.
3099 */
3100 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003101 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003102 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003103
3104 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003105 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003106 */
3107 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003108 while ((NXT(0) != limit) && /* checked */
3109 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003110 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003111 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003112 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003113 if (NXT(1) == '#') {
3114 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003115
Owen Taylor3473f882001-02-23 17:55:21 +00003116 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003117 if (ctxt->replaceEntities) {
3118 if (len > buf_size - 10) {
3119 growBuffer(buf);
3120 }
3121 buf[len++] = '&';
3122 } else {
3123 /*
3124 * The reparsing will be done in xmlStringGetNodeList()
3125 * called by the attribute() function in SAX.c
3126 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003127 if (len > buf_size - 10) {
3128 growBuffer(buf);
3129 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003130 buf[len++] = '&';
3131 buf[len++] = '#';
3132 buf[len++] = '3';
3133 buf[len++] = '8';
3134 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003135 }
3136 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
Owen Taylor3473f882001-02-23 17:55:21 +00003140 len += xmlCopyChar(0, &buf[len], val);
3141 }
3142 } else {
3143 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003144 if ((ent != NULL) &&
3145 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
3149 if ((ctxt->replaceEntities == 0) &&
3150 (ent->content[0] == '&')) {
3151 buf[len++] = '&';
3152 buf[len++] = '#';
3153 buf[len++] = '3';
3154 buf[len++] = '8';
3155 buf[len++] = ';';
3156 } else {
3157 buf[len++] = ent->content[0];
3158 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003159 } else if ((ent != NULL) &&
3160 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003161 xmlChar *rep;
3162
3163 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3164 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003165 XML_SUBSTITUTE_REF,
3166 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003167 if (rep != NULL) {
3168 current = rep;
3169 while (*current != 0) { /* non input consuming */
3170 buf[len++] = *current++;
3171 if (len > buf_size - 10) {
3172 growBuffer(buf);
3173 }
3174 }
3175 xmlFree(rep);
3176 }
3177 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003178 if (len > buf_size - 10) {
3179 growBuffer(buf);
3180 }
Owen Taylor3473f882001-02-23 17:55:21 +00003181 if (ent->content != NULL)
3182 buf[len++] = ent->content[0];
3183 }
3184 } else if (ent != NULL) {
3185 int i = xmlStrlen(ent->name);
3186 const xmlChar *cur = ent->name;
3187
3188 /*
3189 * This may look absurd but is needed to detect
3190 * entities problems
3191 */
3192 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3193 (ent->content != NULL)) {
3194 xmlChar *rep;
3195 rep = xmlStringDecodeEntities(ctxt, ent->content,
3196 XML_SUBSTITUTE_REF, 0, 0, 0);
3197 if (rep != NULL)
3198 xmlFree(rep);
3199 }
3200
3201 /*
3202 * Just output the reference
3203 */
3204 buf[len++] = '&';
3205 if (len > buf_size - i - 10) {
3206 growBuffer(buf);
3207 }
3208 for (;i > 0;i--)
3209 buf[len++] = *cur++;
3210 buf[len++] = ';';
3211 }
3212 }
3213 } else {
3214 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003215 if ((len != 0) || (!normalize)) {
3216 if ((!normalize) || (!in_space)) {
3217 COPY_BUF(l,buf,len,0x20);
3218 if (len > buf_size - 10) {
3219 growBuffer(buf);
3220 }
3221 }
3222 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003223 }
3224 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003226 COPY_BUF(l,buf,len,c);
3227 if (len > buf_size - 10) {
3228 growBuffer(buf);
3229 }
3230 }
3231 NEXTL(l);
3232 }
3233 GROW;
3234 c = CUR_CHAR(l);
3235 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003236 if ((in_space) && (normalize)) {
3237 while (buf[len - 1] == 0x20) len--;
3238 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003239 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003240 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003241 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003242 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003243 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3244 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003245 } else
3246 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003247 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003248 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003249
3250mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003251 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003252 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003253}
3254
3255/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003256 * xmlParseAttValue:
3257 * @ctxt: an XML parser context
3258 *
3259 * parse a value for an attribute
3260 * Note: the parser won't do substitution of entities here, this
3261 * will be handled later in xmlStringGetNodeList
3262 *
3263 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3264 * "'" ([^<&'] | Reference)* "'"
3265 *
3266 * 3.3.3 Attribute-Value Normalization:
3267 * Before the value of an attribute is passed to the application or
3268 * checked for validity, the XML processor must normalize it as follows:
3269 * - a character reference is processed by appending the referenced
3270 * character to the attribute value
3271 * - an entity reference is processed by recursively processing the
3272 * replacement text of the entity
3273 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3274 * appending #x20 to the normalized value, except that only a single
3275 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3276 * parsed entity or the literal entity value of an internal parsed entity
3277 * - other characters are processed by appending them to the normalized value
3278 * If the declared value is not CDATA, then the XML processor must further
3279 * process the normalized attribute value by discarding any leading and
3280 * trailing space (#x20) characters, and by replacing sequences of space
3281 * (#x20) characters by a single space (#x20) character.
3282 * All attributes for which no declaration has been read should be treated
3283 * by a non-validating parser as if declared CDATA.
3284 *
3285 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3286 */
3287
3288
3289xmlChar *
3290xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003291 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003292}
3293
3294/**
Owen Taylor3473f882001-02-23 17:55:21 +00003295 * xmlParseSystemLiteral:
3296 * @ctxt: an XML parser context
3297 *
3298 * parse an XML Literal
3299 *
3300 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3301 *
3302 * Returns the SystemLiteral parsed or NULL
3303 */
3304
3305xmlChar *
3306xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3307 xmlChar *buf = NULL;
3308 int len = 0;
3309 int size = XML_PARSER_BUFFER_SIZE;
3310 int cur, l;
3311 xmlChar stop;
3312 int state = ctxt->instate;
3313 int count = 0;
3314
3315 SHRINK;
3316 if (RAW == '"') {
3317 NEXT;
3318 stop = '"';
3319 } else if (RAW == '\'') {
3320 NEXT;
3321 stop = '\'';
3322 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003323 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003324 return(NULL);
3325 }
3326
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003327 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003328 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003329 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003330 return(NULL);
3331 }
3332 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3333 cur = CUR_CHAR(l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00003334 while ((xmlIsChar(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003335 if (len + 5 >= size) {
3336 size *= 2;
3337 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3338 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003339 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003340 ctxt->instate = (xmlParserInputState) state;
3341 return(NULL);
3342 }
3343 }
3344 count++;
3345 if (count > 50) {
3346 GROW;
3347 count = 0;
3348 }
3349 COPY_BUF(l,buf,len,cur);
3350 NEXTL(l);
3351 cur = CUR_CHAR(l);
3352 if (cur == 0) {
3353 GROW;
3354 SHRINK;
3355 cur = CUR_CHAR(l);
3356 }
3357 }
3358 buf[len] = 0;
3359 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003360 if (!xmlIsChar(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003361 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003362 } else {
3363 NEXT;
3364 }
3365 return(buf);
3366}
3367
3368/**
3369 * xmlParsePubidLiteral:
3370 * @ctxt: an XML parser context
3371 *
3372 * parse an XML public literal
3373 *
3374 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3375 *
3376 * Returns the PubidLiteral parsed or NULL.
3377 */
3378
3379xmlChar *
3380xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3381 xmlChar *buf = NULL;
3382 int len = 0;
3383 int size = XML_PARSER_BUFFER_SIZE;
3384 xmlChar cur;
3385 xmlChar stop;
3386 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003387 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003388
3389 SHRINK;
3390 if (RAW == '"') {
3391 NEXT;
3392 stop = '"';
3393 } else if (RAW == '\'') {
3394 NEXT;
3395 stop = '\'';
3396 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003397 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003398 return(NULL);
3399 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003400 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003401 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003402 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003403 return(NULL);
3404 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003405 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003406 cur = CUR;
3407 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3408 if (len + 1 >= size) {
3409 size *= 2;
3410 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3411 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003412 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003413 return(NULL);
3414 }
3415 }
3416 buf[len++] = cur;
3417 count++;
3418 if (count > 50) {
3419 GROW;
3420 count = 0;
3421 }
3422 NEXT;
3423 cur = CUR;
3424 if (cur == 0) {
3425 GROW;
3426 SHRINK;
3427 cur = CUR;
3428 }
3429 }
3430 buf[len] = 0;
3431 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003432 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003433 } else {
3434 NEXT;
3435 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003436 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003437 return(buf);
3438}
3439
Daniel Veillard48b2f892001-02-25 16:11:03 +00003440void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003441/**
3442 * xmlParseCharData:
3443 * @ctxt: an XML parser context
3444 * @cdata: int indicating whether we are within a CDATA section
3445 *
3446 * parse a CharData section.
3447 * if we are within a CDATA section ']]>' marks an end of section.
3448 *
3449 * The right angle bracket (>) may be represented using the string "&gt;",
3450 * and must, for compatibility, be escaped using "&gt;" or a character
3451 * reference when it appears in the string "]]>" in content, when that
3452 * string is not marking the end of a CDATA section.
3453 *
3454 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3455 */
3456
3457void
3458xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003459 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003460 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003461 int line = ctxt->input->line;
3462 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003463
3464 SHRINK;
3465 GROW;
3466 /*
3467 * Accelerated common case where input don't need to be
3468 * modified before passing it to the handler.
3469 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003470 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003471 in = ctxt->input->cur;
3472 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003473get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003474 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3475 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003477 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003478 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003479 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003480 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003481 ctxt->input->line++;
3482 in++;
3483 }
3484 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003485 }
3486 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003487 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003489 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003490 return;
3491 }
3492 in++;
3493 goto get_more;
3494 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003495 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003496 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003497 if ((ctxt->sax->ignorableWhitespace !=
3498 ctxt->sax->characters) &&
3499 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003500 const xmlChar *tmp = ctxt->input->cur;
3501 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003502
Daniel Veillarda7374592001-05-10 14:17:55 +00003503 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003504 ctxt->sax->ignorableWhitespace(ctxt->userData,
3505 tmp, nbchar);
3506 } else if (ctxt->sax->characters != NULL)
3507 ctxt->sax->characters(ctxt->userData,
3508 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003509 line = ctxt->input->line;
3510 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003511 } else {
3512 if (ctxt->sax->characters != NULL)
3513 ctxt->sax->characters(ctxt->userData,
3514 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003515 line = ctxt->input->line;
3516 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003517 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003518 }
3519 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003520 if (*in == 0xD) {
3521 in++;
3522 if (*in == 0xA) {
3523 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003524 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003525 ctxt->input->line++;
3526 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003527 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003528 in--;
3529 }
3530 if (*in == '<') {
3531 return;
3532 }
3533 if (*in == '&') {
3534 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003535 }
3536 SHRINK;
3537 GROW;
3538 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003539 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003540 nbchar = 0;
3541 }
Daniel Veillard50582112001-03-26 22:52:16 +00003542 ctxt->input->line = line;
3543 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003544 xmlParseCharDataComplex(ctxt, cdata);
3545}
3546
Daniel Veillard01c13b52002-12-10 15:19:08 +00003547/**
3548 * xmlParseCharDataComplex:
3549 * @ctxt: an XML parser context
3550 * @cdata: int indicating whether we are within a CDATA section
3551 *
3552 * parse a CharData section.this is the fallback function
3553 * of xmlParseCharData() when the parsing requires handling
3554 * of non-ASCII characters.
3555 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003556void
3557xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003558 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3559 int nbchar = 0;
3560 int cur, l;
3561 int count = 0;
3562
3563 SHRINK;
3564 GROW;
3565 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003566 while ((cur != '<') && /* checked */
3567 (cur != '&') &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00003568 (xmlIsChar(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003569 if ((cur == ']') && (NXT(1) == ']') &&
3570 (NXT(2) == '>')) {
3571 if (cdata) break;
3572 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003573 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003574 }
3575 }
3576 COPY_BUF(l,buf,nbchar,cur);
3577 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003578 buf[nbchar] = 0;
3579
Owen Taylor3473f882001-02-23 17:55:21 +00003580 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003581 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003582 */
3583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3584 if (areBlanks(ctxt, buf, nbchar)) {
3585 if (ctxt->sax->ignorableWhitespace != NULL)
3586 ctxt->sax->ignorableWhitespace(ctxt->userData,
3587 buf, nbchar);
3588 } else {
3589 if (ctxt->sax->characters != NULL)
3590 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3591 }
3592 }
3593 nbchar = 0;
3594 }
3595 count++;
3596 if (count > 50) {
3597 GROW;
3598 count = 0;
3599 }
3600 NEXTL(l);
3601 cur = CUR_CHAR(l);
3602 }
3603 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003604 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003605 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003606 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003607 */
3608 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3609 if (areBlanks(ctxt, buf, nbchar)) {
3610 if (ctxt->sax->ignorableWhitespace != NULL)
3611 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3612 } else {
3613 if (ctxt->sax->characters != NULL)
3614 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3615 }
3616 }
3617 }
3618}
3619
3620/**
3621 * xmlParseExternalID:
3622 * @ctxt: an XML parser context
3623 * @publicID: a xmlChar** receiving PubidLiteral
3624 * @strict: indicate whether we should restrict parsing to only
3625 * production [75], see NOTE below
3626 *
3627 * Parse an External ID or a Public ID
3628 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003629 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003630 * 'PUBLIC' S PubidLiteral S SystemLiteral
3631 *
3632 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3633 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3634 *
3635 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3636 *
3637 * Returns the function returns SystemLiteral and in the second
3638 * case publicID receives PubidLiteral, is strict is off
3639 * it is possible to return NULL and have publicID set.
3640 */
3641
3642xmlChar *
3643xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3644 xmlChar *URI = NULL;
3645
3646 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003647
3648 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003649 if ((RAW == 'S') && (NXT(1) == 'Y') &&
3650 (NXT(2) == 'S') && (NXT(3) == 'T') &&
3651 (NXT(4) == 'E') && (NXT(5) == 'M')) {
3652 SKIP(6);
3653 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003654 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3655 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003656 }
3657 SKIP_BLANKS;
3658 URI = xmlParseSystemLiteral(ctxt);
3659 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003660 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003661 }
3662 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
3663 (NXT(2) == 'B') && (NXT(3) == 'L') &&
3664 (NXT(4) == 'I') && (NXT(5) == 'C')) {
3665 SKIP(6);
3666 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003667 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003668 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003669 }
3670 SKIP_BLANKS;
3671 *publicID = xmlParsePubidLiteral(ctxt);
3672 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003673 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
3675 if (strict) {
3676 /*
3677 * We don't handle [83] so "S SystemLiteral" is required.
3678 */
3679 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003680 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003681 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003682 }
3683 } else {
3684 /*
3685 * We handle [83] so we return immediately, if
3686 * "S SystemLiteral" is not detected. From a purely parsing
3687 * point of view that's a nice mess.
3688 */
3689 const xmlChar *ptr;
3690 GROW;
3691
3692 ptr = CUR_PTR;
3693 if (!IS_BLANK(*ptr)) return(NULL);
3694
3695 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3696 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3697 }
3698 SKIP_BLANKS;
3699 URI = xmlParseSystemLiteral(ctxt);
3700 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003701 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003702 }
3703 }
3704 return(URI);
3705}
3706
3707/**
3708 * xmlParseComment:
3709 * @ctxt: an XML parser context
3710 *
3711 * Skip an XML (SGML) comment <!-- .... -->
3712 * The spec says that "For compatibility, the string "--" (double-hyphen)
3713 * must not occur within comments. "
3714 *
3715 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3716 */
3717void
3718xmlParseComment(xmlParserCtxtPtr ctxt) {
3719 xmlChar *buf = NULL;
3720 int len;
3721 int size = XML_PARSER_BUFFER_SIZE;
3722 int q, ql;
3723 int r, rl;
3724 int cur, l;
3725 xmlParserInputState state;
3726 xmlParserInputPtr input = ctxt->input;
3727 int count = 0;
3728
3729 /*
3730 * Check that there is a comment right here.
3731 */
3732 if ((RAW != '<') || (NXT(1) != '!') ||
3733 (NXT(2) != '-') || (NXT(3) != '-')) return;
3734
3735 state = ctxt->instate;
3736 ctxt->instate = XML_PARSER_COMMENT;
3737 SHRINK;
3738 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003739 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003740 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003741 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003742 ctxt->instate = state;
3743 return;
3744 }
3745 q = CUR_CHAR(ql);
3746 NEXTL(ql);
3747 r = CUR_CHAR(rl);
3748 NEXTL(rl);
3749 cur = CUR_CHAR(l);
3750 len = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003751 while (xmlIsChar(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003752 ((cur != '>') ||
3753 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003754 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003755 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 if (len + 5 >= size) {
3758 size *= 2;
3759 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3760 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003761 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003762 ctxt->instate = state;
3763 return;
3764 }
3765 }
3766 COPY_BUF(ql,buf,len,q);
3767 q = r;
3768 ql = rl;
3769 r = cur;
3770 rl = l;
3771
3772 count++;
3773 if (count > 50) {
3774 GROW;
3775 count = 0;
3776 }
3777 NEXTL(l);
3778 cur = CUR_CHAR(l);
3779 if (cur == 0) {
3780 SHRINK;
3781 GROW;
3782 cur = CUR_CHAR(l);
3783 }
3784 }
3785 buf[len] = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003786 if (!xmlIsChar(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003787 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003788 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 xmlFree(buf);
3790 } else {
3791 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003792 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3793 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003794 }
3795 NEXT;
3796 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3797 (!ctxt->disableSAX))
3798 ctxt->sax->comment(ctxt->userData, buf);
3799 xmlFree(buf);
3800 }
3801 ctxt->instate = state;
3802}
3803
3804/**
3805 * xmlParsePITarget:
3806 * @ctxt: an XML parser context
3807 *
3808 * parse the name of a PI
3809 *
3810 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3811 *
3812 * Returns the PITarget name or NULL
3813 */
3814
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003815const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003816xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003817 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003818
3819 name = xmlParseName(ctxt);
3820 if ((name != NULL) &&
3821 ((name[0] == 'x') || (name[0] == 'X')) &&
3822 ((name[1] == 'm') || (name[1] == 'M')) &&
3823 ((name[2] == 'l') || (name[2] == 'L'))) {
3824 int i;
3825 if ((name[0] == 'x') && (name[1] == 'm') &&
3826 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003827 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003828 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003829 return(name);
3830 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003831 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003832 return(name);
3833 }
3834 for (i = 0;;i++) {
3835 if (xmlW3CPIs[i] == NULL) break;
3836 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3837 return(name);
3838 }
3839 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
3840 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
3841 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003842 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003843 }
3844 }
3845 return(name);
3846}
3847
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003848#ifdef LIBXML_CATALOG_ENABLED
3849/**
3850 * xmlParseCatalogPI:
3851 * @ctxt: an XML parser context
3852 * @catalog: the PI value string
3853 *
3854 * parse an XML Catalog Processing Instruction.
3855 *
3856 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3857 *
3858 * Occurs only if allowed by the user and if happening in the Misc
3859 * part of the document before any doctype informations
3860 * This will add the given catalog to the parsing context in order
3861 * to be used if there is a resolution need further down in the document
3862 */
3863
3864static void
3865xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3866 xmlChar *URL = NULL;
3867 const xmlChar *tmp, *base;
3868 xmlChar marker;
3869
3870 tmp = catalog;
3871 while (IS_BLANK(*tmp)) tmp++;
3872 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3873 goto error;
3874 tmp += 7;
3875 while (IS_BLANK(*tmp)) tmp++;
3876 if (*tmp != '=') {
3877 return;
3878 }
3879 tmp++;
3880 while (IS_BLANK(*tmp)) tmp++;
3881 marker = *tmp;
3882 if ((marker != '\'') && (marker != '"'))
3883 goto error;
3884 tmp++;
3885 base = tmp;
3886 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3887 if (*tmp == 0)
3888 goto error;
3889 URL = xmlStrndup(base, tmp - base);
3890 tmp++;
3891 while (IS_BLANK(*tmp)) tmp++;
3892 if (*tmp != 0)
3893 goto error;
3894
3895 if (URL != NULL) {
3896 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3897 xmlFree(URL);
3898 }
3899 return;
3900
3901error:
3902 ctxt->errNo = XML_WAR_CATALOG_PI;
3903 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3904 ctxt->sax->warning(ctxt->userData,
3905 "Catalog PI syntax error: %s\n", catalog);
3906 if (URL != NULL)
3907 xmlFree(URL);
3908}
3909#endif
3910
Owen Taylor3473f882001-02-23 17:55:21 +00003911/**
3912 * xmlParsePI:
3913 * @ctxt: an XML parser context
3914 *
3915 * parse an XML Processing Instruction.
3916 *
3917 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3918 *
3919 * The processing is transfered to SAX once parsed.
3920 */
3921
3922void
3923xmlParsePI(xmlParserCtxtPtr ctxt) {
3924 xmlChar *buf = NULL;
3925 int len = 0;
3926 int size = XML_PARSER_BUFFER_SIZE;
3927 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003928 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003929 xmlParserInputState state;
3930 int count = 0;
3931
3932 if ((RAW == '<') && (NXT(1) == '?')) {
3933 xmlParserInputPtr input = ctxt->input;
3934 state = ctxt->instate;
3935 ctxt->instate = XML_PARSER_PI;
3936 /*
3937 * this is a Processing Instruction.
3938 */
3939 SKIP(2);
3940 SHRINK;
3941
3942 /*
3943 * Parse the target name and check for special support like
3944 * namespace.
3945 */
3946 target = xmlParsePITarget(ctxt);
3947 if (target != NULL) {
3948 if ((RAW == '?') && (NXT(1) == '>')) {
3949 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003950 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3951 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003952 }
3953 SKIP(2);
3954
3955 /*
3956 * SAX: PI detected.
3957 */
3958 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3959 (ctxt->sax->processingInstruction != NULL))
3960 ctxt->sax->processingInstruction(ctxt->userData,
3961 target, NULL);
3962 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003963 return;
3964 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003965 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003966 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003967 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003968 ctxt->instate = state;
3969 return;
3970 }
3971 cur = CUR;
3972 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003973 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3974 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 }
3976 SKIP_BLANKS;
3977 cur = CUR_CHAR(l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00003978 while (xmlIsChar(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003979 ((cur != '?') || (NXT(1) != '>'))) {
3980 if (len + 5 >= size) {
3981 size *= 2;
3982 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3983 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003984 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003985 ctxt->instate = state;
3986 return;
3987 }
3988 }
3989 count++;
3990 if (count > 50) {
3991 GROW;
3992 count = 0;
3993 }
3994 COPY_BUF(l,buf,len,cur);
3995 NEXTL(l);
3996 cur = CUR_CHAR(l);
3997 if (cur == 0) {
3998 SHRINK;
3999 GROW;
4000 cur = CUR_CHAR(l);
4001 }
4002 }
4003 buf[len] = 0;
4004 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004005 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4006 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004007 } else {
4008 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004009 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4010 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004011 }
4012 SKIP(2);
4013
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004014#ifdef LIBXML_CATALOG_ENABLED
4015 if (((state == XML_PARSER_MISC) ||
4016 (state == XML_PARSER_START)) &&
4017 (xmlStrEqual(target, XML_CATALOG_PI))) {
4018 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4019 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4020 (allow == XML_CATA_ALLOW_ALL))
4021 xmlParseCatalogPI(ctxt, buf);
4022 }
4023#endif
4024
4025
Owen Taylor3473f882001-02-23 17:55:21 +00004026 /*
4027 * SAX: PI detected.
4028 */
4029 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4030 (ctxt->sax->processingInstruction != NULL))
4031 ctxt->sax->processingInstruction(ctxt->userData,
4032 target, buf);
4033 }
4034 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004035 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004036 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004037 }
4038 ctxt->instate = state;
4039 }
4040}
4041
4042/**
4043 * xmlParseNotationDecl:
4044 * @ctxt: an XML parser context
4045 *
4046 * parse a notation declaration
4047 *
4048 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4049 *
4050 * Hence there is actually 3 choices:
4051 * 'PUBLIC' S PubidLiteral
4052 * 'PUBLIC' S PubidLiteral S SystemLiteral
4053 * and 'SYSTEM' S SystemLiteral
4054 *
4055 * See the NOTE on xmlParseExternalID().
4056 */
4057
4058void
4059xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004060 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004061 xmlChar *Pubid;
4062 xmlChar *Systemid;
4063
4064 if ((RAW == '<') && (NXT(1) == '!') &&
4065 (NXT(2) == 'N') && (NXT(3) == 'O') &&
4066 (NXT(4) == 'T') && (NXT(5) == 'A') &&
4067 (NXT(6) == 'T') && (NXT(7) == 'I') &&
4068 (NXT(8) == 'O') && (NXT(9) == 'N')) {
4069 xmlParserInputPtr input = ctxt->input;
4070 SHRINK;
4071 SKIP(10);
4072 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004073 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4074 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004075 return;
4076 }
4077 SKIP_BLANKS;
4078
Daniel Veillard76d66f42001-05-16 21:05:17 +00004079 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004080 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004081 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004082 return;
4083 }
4084 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004086 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004087 return;
4088 }
4089 SKIP_BLANKS;
4090
4091 /*
4092 * Parse the IDs.
4093 */
4094 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4095 SKIP_BLANKS;
4096
4097 if (RAW == '>') {
4098 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4100 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004101 }
4102 NEXT;
4103 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4104 (ctxt->sax->notationDecl != NULL))
4105 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4106 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004107 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004108 }
Owen Taylor3473f882001-02-23 17:55:21 +00004109 if (Systemid != NULL) xmlFree(Systemid);
4110 if (Pubid != NULL) xmlFree(Pubid);
4111 }
4112}
4113
4114/**
4115 * xmlParseEntityDecl:
4116 * @ctxt: an XML parser context
4117 *
4118 * parse <!ENTITY declarations
4119 *
4120 * [70] EntityDecl ::= GEDecl | PEDecl
4121 *
4122 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4123 *
4124 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4125 *
4126 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4127 *
4128 * [74] PEDef ::= EntityValue | ExternalID
4129 *
4130 * [76] NDataDecl ::= S 'NDATA' S Name
4131 *
4132 * [ VC: Notation Declared ]
4133 * The Name must match the declared name of a notation.
4134 */
4135
4136void
4137xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004138 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004139 xmlChar *value = NULL;
4140 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004141 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004142 int isParameter = 0;
4143 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004144 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004145
4146 GROW;
4147 if ((RAW == '<') && (NXT(1) == '!') &&
4148 (NXT(2) == 'E') && (NXT(3) == 'N') &&
4149 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4150 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
4151 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004152 SHRINK;
4153 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004154 skipped = SKIP_BLANKS;
4155 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004156 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4157 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004158 }
Owen Taylor3473f882001-02-23 17:55:21 +00004159
4160 if (RAW == '%') {
4161 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004162 skipped = SKIP_BLANKS;
4163 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4165 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004166 }
Owen Taylor3473f882001-02-23 17:55:21 +00004167 isParameter = 1;
4168 }
4169
Daniel Veillard76d66f42001-05-16 21:05:17 +00004170 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004171 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4173 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004174 return;
4175 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004176 skipped = SKIP_BLANKS;
4177 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004178 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4179 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004180 }
Owen Taylor3473f882001-02-23 17:55:21 +00004181
Daniel Veillardf5582f12002-06-11 10:08:16 +00004182 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004183 /*
4184 * handle the various case of definitions...
4185 */
4186 if (isParameter) {
4187 if ((RAW == '"') || (RAW == '\'')) {
4188 value = xmlParseEntityValue(ctxt, &orig);
4189 if (value) {
4190 if ((ctxt->sax != NULL) &&
4191 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4192 ctxt->sax->entityDecl(ctxt->userData, name,
4193 XML_INTERNAL_PARAMETER_ENTITY,
4194 NULL, NULL, value);
4195 }
4196 } else {
4197 URI = xmlParseExternalID(ctxt, &literal, 1);
4198 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004199 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004200 }
4201 if (URI) {
4202 xmlURIPtr uri;
4203
4204 uri = xmlParseURI((const char *) URI);
4205 if (uri == NULL) {
4206 ctxt->errNo = XML_ERR_INVALID_URI;
4207 if ((ctxt->sax != NULL) &&
4208 (!ctxt->disableSAX) &&
4209 (ctxt->sax->error != NULL))
4210 ctxt->sax->error(ctxt->userData,
4211 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004212 /*
4213 * This really ought to be a well formedness error
4214 * but the XML Core WG decided otherwise c.f. issue
4215 * E26 of the XML erratas.
4216 */
Owen Taylor3473f882001-02-23 17:55:21 +00004217 } else {
4218 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004219 /*
4220 * Okay this is foolish to block those but not
4221 * invalid URIs.
4222 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004223 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004224 } else {
4225 if ((ctxt->sax != NULL) &&
4226 (!ctxt->disableSAX) &&
4227 (ctxt->sax->entityDecl != NULL))
4228 ctxt->sax->entityDecl(ctxt->userData, name,
4229 XML_EXTERNAL_PARAMETER_ENTITY,
4230 literal, URI, NULL);
4231 }
4232 xmlFreeURI(uri);
4233 }
4234 }
4235 }
4236 } else {
4237 if ((RAW == '"') || (RAW == '\'')) {
4238 value = xmlParseEntityValue(ctxt, &orig);
4239 if ((ctxt->sax != NULL) &&
4240 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4241 ctxt->sax->entityDecl(ctxt->userData, name,
4242 XML_INTERNAL_GENERAL_ENTITY,
4243 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004244 /*
4245 * For expat compatibility in SAX mode.
4246 */
4247 if ((ctxt->myDoc == NULL) ||
4248 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4249 if (ctxt->myDoc == NULL) {
4250 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4251 }
4252 if (ctxt->myDoc->intSubset == NULL)
4253 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4254 BAD_CAST "fake", NULL, NULL);
4255
Daniel Veillard1af9a412003-08-20 22:54:39 +00004256 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4257 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004258 }
Owen Taylor3473f882001-02-23 17:55:21 +00004259 } else {
4260 URI = xmlParseExternalID(ctxt, &literal, 1);
4261 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004262 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 }
4264 if (URI) {
4265 xmlURIPtr uri;
4266
4267 uri = xmlParseURI((const char *)URI);
4268 if (uri == NULL) {
4269 ctxt->errNo = XML_ERR_INVALID_URI;
4270 if ((ctxt->sax != NULL) &&
4271 (!ctxt->disableSAX) &&
4272 (ctxt->sax->error != NULL))
4273 ctxt->sax->error(ctxt->userData,
4274 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004275 /*
4276 * This really ought to be a well formedness error
4277 * but the XML Core WG decided otherwise c.f. issue
4278 * E26 of the XML erratas.
4279 */
Owen Taylor3473f882001-02-23 17:55:21 +00004280 } else {
4281 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004282 /*
4283 * Okay this is foolish to block those but not
4284 * invalid URIs.
4285 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004286 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004287 }
4288 xmlFreeURI(uri);
4289 }
4290 }
4291 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004292 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4293 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004294 }
4295 SKIP_BLANKS;
4296 if ((RAW == 'N') && (NXT(1) == 'D') &&
4297 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4298 (NXT(4) == 'A')) {
4299 SKIP(5);
4300 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004301 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4302 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004303 }
4304 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004305 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4307 (ctxt->sax->unparsedEntityDecl != NULL))
4308 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4309 literal, URI, ndata);
4310 } else {
4311 if ((ctxt->sax != NULL) &&
4312 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4313 ctxt->sax->entityDecl(ctxt->userData, name,
4314 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4315 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004316 /*
4317 * For expat compatibility in SAX mode.
4318 * assuming the entity repalcement was asked for
4319 */
4320 if ((ctxt->replaceEntities != 0) &&
4321 ((ctxt->myDoc == NULL) ||
4322 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4323 if (ctxt->myDoc == NULL) {
4324 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4325 }
4326
4327 if (ctxt->myDoc->intSubset == NULL)
4328 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4329 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004330 xmlSAX2EntityDecl(ctxt, name,
4331 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4332 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004333 }
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
4335 }
4336 }
4337 SKIP_BLANKS;
4338 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004339 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004340 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004341 } else {
4342 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004343 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4344 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004345 }
4346 NEXT;
4347 }
4348 if (orig != NULL) {
4349 /*
4350 * Ugly mechanism to save the raw entity value.
4351 */
4352 xmlEntityPtr cur = NULL;
4353
4354 if (isParameter) {
4355 if ((ctxt->sax != NULL) &&
4356 (ctxt->sax->getParameterEntity != NULL))
4357 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4358 } else {
4359 if ((ctxt->sax != NULL) &&
4360 (ctxt->sax->getEntity != NULL))
4361 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004362 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004363 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004364 }
Owen Taylor3473f882001-02-23 17:55:21 +00004365 }
4366 if (cur != NULL) {
4367 if (cur->orig != NULL)
4368 xmlFree(orig);
4369 else
4370 cur->orig = orig;
4371 } else
4372 xmlFree(orig);
4373 }
Owen Taylor3473f882001-02-23 17:55:21 +00004374 if (value != NULL) xmlFree(value);
4375 if (URI != NULL) xmlFree(URI);
4376 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004377 }
4378}
4379
4380/**
4381 * xmlParseDefaultDecl:
4382 * @ctxt: an XML parser context
4383 * @value: Receive a possible fixed default value for the attribute
4384 *
4385 * Parse an attribute default declaration
4386 *
4387 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4388 *
4389 * [ VC: Required Attribute ]
4390 * if the default declaration is the keyword #REQUIRED, then the
4391 * attribute must be specified for all elements of the type in the
4392 * attribute-list declaration.
4393 *
4394 * [ VC: Attribute Default Legal ]
4395 * The declared default value must meet the lexical constraints of
4396 * the declared attribute type c.f. xmlValidateAttributeDecl()
4397 *
4398 * [ VC: Fixed Attribute Default ]
4399 * if an attribute has a default value declared with the #FIXED
4400 * keyword, instances of that attribute must match the default value.
4401 *
4402 * [ WFC: No < in Attribute Values ]
4403 * handled in xmlParseAttValue()
4404 *
4405 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4406 * or XML_ATTRIBUTE_FIXED.
4407 */
4408
4409int
4410xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4411 int val;
4412 xmlChar *ret;
4413
4414 *value = NULL;
4415 if ((RAW == '#') && (NXT(1) == 'R') &&
4416 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4417 (NXT(4) == 'U') && (NXT(5) == 'I') &&
4418 (NXT(6) == 'R') && (NXT(7) == 'E') &&
4419 (NXT(8) == 'D')) {
4420 SKIP(9);
4421 return(XML_ATTRIBUTE_REQUIRED);
4422 }
4423 if ((RAW == '#') && (NXT(1) == 'I') &&
4424 (NXT(2) == 'M') && (NXT(3) == 'P') &&
4425 (NXT(4) == 'L') && (NXT(5) == 'I') &&
4426 (NXT(6) == 'E') && (NXT(7) == 'D')) {
4427 SKIP(8);
4428 return(XML_ATTRIBUTE_IMPLIED);
4429 }
4430 val = XML_ATTRIBUTE_NONE;
4431 if ((RAW == '#') && (NXT(1) == 'F') &&
4432 (NXT(2) == 'I') && (NXT(3) == 'X') &&
4433 (NXT(4) == 'E') && (NXT(5) == 'D')) {
4434 SKIP(6);
4435 val = XML_ATTRIBUTE_FIXED;
4436 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4438 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004439 }
4440 SKIP_BLANKS;
4441 }
4442 ret = xmlParseAttValue(ctxt);
4443 ctxt->instate = XML_PARSER_DTD;
4444 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004445 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004446 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004447 } else
4448 *value = ret;
4449 return(val);
4450}
4451
4452/**
4453 * xmlParseNotationType:
4454 * @ctxt: an XML parser context
4455 *
4456 * parse an Notation attribute type.
4457 *
4458 * Note: the leading 'NOTATION' S part has already being parsed...
4459 *
4460 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4461 *
4462 * [ VC: Notation Attributes ]
4463 * Values of this type must match one of the notation names included
4464 * in the declaration; all notation names in the declaration must be declared.
4465 *
4466 * Returns: the notation attribute tree built while parsing
4467 */
4468
4469xmlEnumerationPtr
4470xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004471 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004472 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4473
4474 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004476 return(NULL);
4477 }
4478 SHRINK;
4479 do {
4480 NEXT;
4481 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004482 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004483 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004484 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4485 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004486 return(ret);
4487 }
4488 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004489 if (cur == NULL) return(ret);
4490 if (last == NULL) ret = last = cur;
4491 else {
4492 last->next = cur;
4493 last = cur;
4494 }
4495 SKIP_BLANKS;
4496 } while (RAW == '|');
4497 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004498 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004499 if ((last != NULL) && (last != ret))
4500 xmlFreeEnumeration(last);
4501 return(ret);
4502 }
4503 NEXT;
4504 return(ret);
4505}
4506
4507/**
4508 * xmlParseEnumerationType:
4509 * @ctxt: an XML parser context
4510 *
4511 * parse an Enumeration attribute type.
4512 *
4513 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4514 *
4515 * [ VC: Enumeration ]
4516 * Values of this type must match one of the Nmtoken tokens in
4517 * the declaration
4518 *
4519 * Returns: the enumeration attribute tree built while parsing
4520 */
4521
4522xmlEnumerationPtr
4523xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4524 xmlChar *name;
4525 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4526
4527 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004528 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004529 return(NULL);
4530 }
4531 SHRINK;
4532 do {
4533 NEXT;
4534 SKIP_BLANKS;
4535 name = xmlParseNmtoken(ctxt);
4536 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004537 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004538 return(ret);
4539 }
4540 cur = xmlCreateEnumeration(name);
4541 xmlFree(name);
4542 if (cur == NULL) return(ret);
4543 if (last == NULL) ret = last = cur;
4544 else {
4545 last->next = cur;
4546 last = cur;
4547 }
4548 SKIP_BLANKS;
4549 } while (RAW == '|');
4550 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004551 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 return(ret);
4553 }
4554 NEXT;
4555 return(ret);
4556}
4557
4558/**
4559 * xmlParseEnumeratedType:
4560 * @ctxt: an XML parser context
4561 * @tree: the enumeration tree built while parsing
4562 *
4563 * parse an Enumerated attribute type.
4564 *
4565 * [57] EnumeratedType ::= NotationType | Enumeration
4566 *
4567 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4568 *
4569 *
4570 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4571 */
4572
4573int
4574xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4575 if ((RAW == 'N') && (NXT(1) == 'O') &&
4576 (NXT(2) == 'T') && (NXT(3) == 'A') &&
4577 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4578 (NXT(6) == 'O') && (NXT(7) == 'N')) {
4579 SKIP(8);
4580 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004581 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4582 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004583 return(0);
4584 }
4585 SKIP_BLANKS;
4586 *tree = xmlParseNotationType(ctxt);
4587 if (*tree == NULL) return(0);
4588 return(XML_ATTRIBUTE_NOTATION);
4589 }
4590 *tree = xmlParseEnumerationType(ctxt);
4591 if (*tree == NULL) return(0);
4592 return(XML_ATTRIBUTE_ENUMERATION);
4593}
4594
4595/**
4596 * xmlParseAttributeType:
4597 * @ctxt: an XML parser context
4598 * @tree: the enumeration tree built while parsing
4599 *
4600 * parse the Attribute list def for an element
4601 *
4602 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4603 *
4604 * [55] StringType ::= 'CDATA'
4605 *
4606 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4607 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4608 *
4609 * Validity constraints for attribute values syntax are checked in
4610 * xmlValidateAttributeValue()
4611 *
4612 * [ VC: ID ]
4613 * Values of type ID must match the Name production. A name must not
4614 * appear more than once in an XML document as a value of this type;
4615 * i.e., ID values must uniquely identify the elements which bear them.
4616 *
4617 * [ VC: One ID per Element Type ]
4618 * No element type may have more than one ID attribute specified.
4619 *
4620 * [ VC: ID Attribute Default ]
4621 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4622 *
4623 * [ VC: IDREF ]
4624 * Values of type IDREF must match the Name production, and values
4625 * of type IDREFS must match Names; each IDREF Name must match the value
4626 * of an ID attribute on some element in the XML document; i.e. IDREF
4627 * values must match the value of some ID attribute.
4628 *
4629 * [ VC: Entity Name ]
4630 * Values of type ENTITY must match the Name production, values
4631 * of type ENTITIES must match Names; each Entity Name must match the
4632 * name of an unparsed entity declared in the DTD.
4633 *
4634 * [ VC: Name Token ]
4635 * Values of type NMTOKEN must match the Nmtoken production; values
4636 * of type NMTOKENS must match Nmtokens.
4637 *
4638 * Returns the attribute type
4639 */
4640int
4641xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4642 SHRINK;
4643 if ((RAW == 'C') && (NXT(1) == 'D') &&
4644 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4645 (NXT(4) == 'A')) {
4646 SKIP(5);
4647 return(XML_ATTRIBUTE_CDATA);
4648 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4649 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4650 (NXT(4) == 'F') && (NXT(5) == 'S')) {
4651 SKIP(6);
4652 return(XML_ATTRIBUTE_IDREFS);
4653 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
4654 (NXT(2) == 'R') && (NXT(3) == 'E') &&
4655 (NXT(4) == 'F')) {
4656 SKIP(5);
4657 return(XML_ATTRIBUTE_IDREF);
4658 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4659 SKIP(2);
4660 return(XML_ATTRIBUTE_ID);
4661 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4662 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4663 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
4664 SKIP(6);
4665 return(XML_ATTRIBUTE_ENTITY);
4666 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
4667 (NXT(2) == 'T') && (NXT(3) == 'I') &&
4668 (NXT(4) == 'T') && (NXT(5) == 'I') &&
4669 (NXT(6) == 'E') && (NXT(7) == 'S')) {
4670 SKIP(8);
4671 return(XML_ATTRIBUTE_ENTITIES);
4672 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4673 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4674 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4675 (NXT(6) == 'N') && (NXT(7) == 'S')) {
4676 SKIP(8);
4677 return(XML_ATTRIBUTE_NMTOKENS);
4678 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
4679 (NXT(2) == 'T') && (NXT(3) == 'O') &&
4680 (NXT(4) == 'K') && (NXT(5) == 'E') &&
4681 (NXT(6) == 'N')) {
4682 SKIP(7);
4683 return(XML_ATTRIBUTE_NMTOKEN);
4684 }
4685 return(xmlParseEnumeratedType(ctxt, tree));
4686}
4687
4688/**
4689 * xmlParseAttributeListDecl:
4690 * @ctxt: an XML parser context
4691 *
4692 * : parse the Attribute list def for an element
4693 *
4694 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4695 *
4696 * [53] AttDef ::= S Name S AttType S DefaultDecl
4697 *
4698 */
4699void
4700xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004701 const xmlChar *elemName;
4702 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 xmlEnumerationPtr tree;
4704
4705 if ((RAW == '<') && (NXT(1) == '!') &&
4706 (NXT(2) == 'A') && (NXT(3) == 'T') &&
4707 (NXT(4) == 'T') && (NXT(5) == 'L') &&
4708 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4709 (NXT(8) == 'T')) {
4710 xmlParserInputPtr input = ctxt->input;
4711
4712 SKIP(9);
4713 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004715 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004716 }
4717 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004718 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004719 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4721 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004722 return;
4723 }
4724 SKIP_BLANKS;
4725 GROW;
4726 while (RAW != '>') {
4727 const xmlChar *check = CUR_PTR;
4728 int type;
4729 int def;
4730 xmlChar *defaultValue = NULL;
4731
4732 GROW;
4733 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004734 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004735 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004736 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4737 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004738 break;
4739 }
4740 GROW;
4741 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004742 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004743 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (defaultValue != NULL)
4745 xmlFree(defaultValue);
4746 break;
4747 }
4748 SKIP_BLANKS;
4749
4750 type = xmlParseAttributeType(ctxt, &tree);
4751 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004752 if (defaultValue != NULL)
4753 xmlFree(defaultValue);
4754 break;
4755 }
4756
4757 GROW;
4758 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004759 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4760 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004761 if (defaultValue != NULL)
4762 xmlFree(defaultValue);
4763 if (tree != NULL)
4764 xmlFreeEnumeration(tree);
4765 break;
4766 }
4767 SKIP_BLANKS;
4768
4769 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4770 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004771 if (defaultValue != NULL)
4772 xmlFree(defaultValue);
4773 if (tree != NULL)
4774 xmlFreeEnumeration(tree);
4775 break;
4776 }
4777
4778 GROW;
4779 if (RAW != '>') {
4780 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004782 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004783 if (defaultValue != NULL)
4784 xmlFree(defaultValue);
4785 if (tree != NULL)
4786 xmlFreeEnumeration(tree);
4787 break;
4788 }
4789 SKIP_BLANKS;
4790 }
4791 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004792 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4793 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004794 if (defaultValue != NULL)
4795 xmlFree(defaultValue);
4796 if (tree != NULL)
4797 xmlFreeEnumeration(tree);
4798 break;
4799 }
4800 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4801 (ctxt->sax->attributeDecl != NULL))
4802 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4803 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004804 else if (tree != NULL)
4805 xmlFreeEnumeration(tree);
4806
4807 if ((ctxt->sax2) && (defaultValue != NULL) &&
4808 (def != XML_ATTRIBUTE_IMPLIED) &&
4809 (def != XML_ATTRIBUTE_REQUIRED)) {
4810 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4811 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004812 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4813 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4814 }
Owen Taylor3473f882001-02-23 17:55:21 +00004815 if (defaultValue != NULL)
4816 xmlFree(defaultValue);
4817 GROW;
4818 }
4819 if (RAW == '>') {
4820 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004821 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4822 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004823 }
4824 NEXT;
4825 }
Owen Taylor3473f882001-02-23 17:55:21 +00004826 }
4827}
4828
4829/**
4830 * xmlParseElementMixedContentDecl:
4831 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004832 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004833 *
4834 * parse the declaration for a Mixed Element content
4835 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4836 *
4837 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4838 * '(' S? '#PCDATA' S? ')'
4839 *
4840 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4841 *
4842 * [ VC: No Duplicate Types ]
4843 * The same name must not appear more than once in a single
4844 * mixed-content declaration.
4845 *
4846 * returns: the list of the xmlElementContentPtr describing the element choices
4847 */
4848xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004849xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004850 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004851 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004852
4853 GROW;
4854 if ((RAW == '#') && (NXT(1) == 'P') &&
4855 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4856 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4857 (NXT(6) == 'A')) {
4858 SKIP(7);
4859 SKIP_BLANKS;
4860 SHRINK;
4861 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004862 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004863 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4864 if (ctxt->vctxt.error != NULL)
4865 ctxt->vctxt.error(ctxt->vctxt.userData,
4866"Element content declaration doesn't start and stop in the same entity\n");
4867 ctxt->valid = 0;
4868 }
Owen Taylor3473f882001-02-23 17:55:21 +00004869 NEXT;
4870 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4871 if (RAW == '*') {
4872 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4873 NEXT;
4874 }
4875 return(ret);
4876 }
4877 if ((RAW == '(') || (RAW == '|')) {
4878 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4879 if (ret == NULL) return(NULL);
4880 }
4881 while (RAW == '|') {
4882 NEXT;
4883 if (elem == NULL) {
4884 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4885 if (ret == NULL) return(NULL);
4886 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004887 if (cur != NULL)
4888 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004889 cur = ret;
4890 } else {
4891 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4892 if (n == NULL) return(NULL);
4893 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004894 if (n->c1 != NULL)
4895 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004896 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004897 if (n != NULL)
4898 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004899 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004900 }
4901 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004902 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004904 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004905 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004906 xmlFreeElementContent(cur);
4907 return(NULL);
4908 }
4909 SKIP_BLANKS;
4910 GROW;
4911 }
4912 if ((RAW == ')') && (NXT(1) == '*')) {
4913 if (elem != NULL) {
4914 cur->c2 = xmlNewElementContent(elem,
4915 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004916 if (cur->c2 != NULL)
4917 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004918 }
4919 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004921 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4922 if (ctxt->vctxt.error != NULL)
4923 ctxt->vctxt.error(ctxt->vctxt.userData,
4924"Element content declaration doesn't start and stop in the same entity\n");
4925 ctxt->valid = 0;
4926 }
Owen Taylor3473f882001-02-23 17:55:21 +00004927 SKIP(2);
4928 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004929 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004930 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004931 return(NULL);
4932 }
4933
4934 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004935 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004936 }
4937 return(ret);
4938}
4939
4940/**
4941 * xmlParseElementChildrenContentDecl:
4942 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004943 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004944 *
4945 * parse the declaration for a Mixed Element content
4946 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4947 *
4948 *
4949 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4950 *
4951 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4952 *
4953 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4954 *
4955 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4956 *
4957 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4958 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004959 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004960 * opening or closing parentheses in a choice, seq, or Mixed
4961 * construct is contained in the replacement text for a parameter
4962 * entity, both must be contained in the same replacement text. For
4963 * interoperability, if a parameter-entity reference appears in a
4964 * choice, seq, or Mixed construct, its replacement text should not
4965 * be empty, and neither the first nor last non-blank character of
4966 * the replacement text should be a connector (| or ,).
4967 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004968 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004969 * hierarchy.
4970 */
4971xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004972xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004973 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004974 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004975 xmlChar type = 0;
4976
4977 SKIP_BLANKS;
4978 GROW;
4979 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004980 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004981
Owen Taylor3473f882001-02-23 17:55:21 +00004982 /* Recurse on first child */
4983 NEXT;
4984 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004985 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004986 SKIP_BLANKS;
4987 GROW;
4988 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004989 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004990 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004991 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004992 return(NULL);
4993 }
4994 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004995 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004996 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004997 return(NULL);
4998 }
Owen Taylor3473f882001-02-23 17:55:21 +00004999 GROW;
5000 if (RAW == '?') {
5001 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5002 NEXT;
5003 } else if (RAW == '*') {
5004 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5005 NEXT;
5006 } else if (RAW == '+') {
5007 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5008 NEXT;
5009 } else {
5010 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5011 }
Owen Taylor3473f882001-02-23 17:55:21 +00005012 GROW;
5013 }
5014 SKIP_BLANKS;
5015 SHRINK;
5016 while (RAW != ')') {
5017 /*
5018 * Each loop we parse one separator and one element.
5019 */
5020 if (RAW == ',') {
5021 if (type == 0) type = CUR;
5022
5023 /*
5024 * Detect "Name | Name , Name" error
5025 */
5026 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005027 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005028 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005030 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005031 xmlFreeElementContent(last);
5032 if (ret != NULL)
5033 xmlFreeElementContent(ret);
5034 return(NULL);
5035 }
5036 NEXT;
5037
5038 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5039 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005040 if ((last != NULL) && (last != ret))
5041 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005042 xmlFreeElementContent(ret);
5043 return(NULL);
5044 }
5045 if (last == NULL) {
5046 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005047 if (ret != NULL)
5048 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005049 ret = cur = op;
5050 } else {
5051 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005052 if (op != NULL)
5053 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005054 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005055 if (last != NULL)
5056 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005057 cur =op;
5058 last = NULL;
5059 }
5060 } else if (RAW == '|') {
5061 if (type == 0) type = CUR;
5062
5063 /*
5064 * Detect "Name , Name | Name" error
5065 */
5066 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005067 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005068 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005069 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005070 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005071 xmlFreeElementContent(last);
5072 if (ret != NULL)
5073 xmlFreeElementContent(ret);
5074 return(NULL);
5075 }
5076 NEXT;
5077
5078 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5079 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005080 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005081 xmlFreeElementContent(last);
5082 if (ret != NULL)
5083 xmlFreeElementContent(ret);
5084 return(NULL);
5085 }
5086 if (last == NULL) {
5087 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005088 if (ret != NULL)
5089 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005090 ret = cur = op;
5091 } else {
5092 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005093 if (op != NULL)
5094 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005095 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005096 if (last != NULL)
5097 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005098 cur =op;
5099 last = NULL;
5100 }
5101 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005102 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005103 if (ret != NULL)
5104 xmlFreeElementContent(ret);
5105 return(NULL);
5106 }
5107 GROW;
5108 SKIP_BLANKS;
5109 GROW;
5110 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005111 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005112 /* Recurse on second child */
5113 NEXT;
5114 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005115 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005116 SKIP_BLANKS;
5117 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005118 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005119 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005120 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005121 if (ret != NULL)
5122 xmlFreeElementContent(ret);
5123 return(NULL);
5124 }
5125 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 if (RAW == '?') {
5127 last->ocur = XML_ELEMENT_CONTENT_OPT;
5128 NEXT;
5129 } else if (RAW == '*') {
5130 last->ocur = XML_ELEMENT_CONTENT_MULT;
5131 NEXT;
5132 } else if (RAW == '+') {
5133 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5134 NEXT;
5135 } else {
5136 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5137 }
5138 }
5139 SKIP_BLANKS;
5140 GROW;
5141 }
5142 if ((cur != NULL) && (last != NULL)) {
5143 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005144 if (last != NULL)
5145 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005147 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005148 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5149 if (ctxt->vctxt.error != NULL)
5150 ctxt->vctxt.error(ctxt->vctxt.userData,
5151"Element content declaration doesn't start and stop in the same entity\n");
5152 ctxt->valid = 0;
5153 }
Owen Taylor3473f882001-02-23 17:55:21 +00005154 NEXT;
5155 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005156 if (ret != NULL)
5157 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 NEXT;
5159 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005160 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005161 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005162 cur = ret;
5163 /*
5164 * Some normalization:
5165 * (a | b* | c?)* == (a | b | c)*
5166 */
5167 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5168 if ((cur->c1 != NULL) &&
5169 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5170 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5171 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5172 if ((cur->c2 != NULL) &&
5173 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5174 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5175 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5176 cur = cur->c2;
5177 }
5178 }
Owen Taylor3473f882001-02-23 17:55:21 +00005179 NEXT;
5180 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005181 if (ret != NULL) {
5182 int found = 0;
5183
Daniel Veillarde470df72001-04-18 21:41:07 +00005184 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005185 /*
5186 * Some normalization:
5187 * (a | b*)+ == (a | b)*
5188 * (a | b?)+ == (a | b)*
5189 */
5190 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5191 if ((cur->c1 != NULL) &&
5192 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5193 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5194 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5195 found = 1;
5196 }
5197 if ((cur->c2 != NULL) &&
5198 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5199 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5200 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5201 found = 1;
5202 }
5203 cur = cur->c2;
5204 }
5205 if (found)
5206 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5207 }
Owen Taylor3473f882001-02-23 17:55:21 +00005208 NEXT;
5209 }
5210 return(ret);
5211}
5212
5213/**
5214 * xmlParseElementContentDecl:
5215 * @ctxt: an XML parser context
5216 * @name: the name of the element being defined.
5217 * @result: the Element Content pointer will be stored here if any
5218 *
5219 * parse the declaration for an Element content either Mixed or Children,
5220 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5221 *
5222 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5223 *
5224 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5225 */
5226
5227int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005228xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005229 xmlElementContentPtr *result) {
5230
5231 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005232 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005233 int res;
5234
5235 *result = NULL;
5236
5237 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005238 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005239 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 return(-1);
5241 }
5242 NEXT;
5243 GROW;
5244 SKIP_BLANKS;
5245 if ((RAW == '#') && (NXT(1) == 'P') &&
5246 (NXT(2) == 'C') && (NXT(3) == 'D') &&
5247 (NXT(4) == 'A') && (NXT(5) == 'T') &&
5248 (NXT(6) == 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005249 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 res = XML_ELEMENT_TYPE_MIXED;
5251 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005252 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005253 res = XML_ELEMENT_TYPE_ELEMENT;
5254 }
Owen Taylor3473f882001-02-23 17:55:21 +00005255 SKIP_BLANKS;
5256 *result = tree;
5257 return(res);
5258}
5259
5260/**
5261 * xmlParseElementDecl:
5262 * @ctxt: an XML parser context
5263 *
5264 * parse an Element declaration.
5265 *
5266 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5267 *
5268 * [ VC: Unique Element Type Declaration ]
5269 * No element type may be declared more than once
5270 *
5271 * Returns the type of the element, or -1 in case of error
5272 */
5273int
5274xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005275 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005276 int ret = -1;
5277 xmlElementContentPtr content = NULL;
5278
5279 GROW;
5280 if ((RAW == '<') && (NXT(1) == '!') &&
5281 (NXT(2) == 'E') && (NXT(3) == 'L') &&
5282 (NXT(4) == 'E') && (NXT(5) == 'M') &&
5283 (NXT(6) == 'E') && (NXT(7) == 'N') &&
5284 (NXT(8) == 'T')) {
5285 xmlParserInputPtr input = ctxt->input;
5286
5287 SKIP(9);
5288 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005289 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005291 }
5292 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005293 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005294 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005295 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5296 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005297 return(-1);
5298 }
5299 while ((RAW == 0) && (ctxt->inputNr > 1))
5300 xmlPopInput(ctxt);
5301 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005302 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5303 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005304 }
5305 SKIP_BLANKS;
5306 if ((RAW == 'E') && (NXT(1) == 'M') &&
5307 (NXT(2) == 'P') && (NXT(3) == 'T') &&
5308 (NXT(4) == 'Y')) {
5309 SKIP(5);
5310 /*
5311 * Element must always be empty.
5312 */
5313 ret = XML_ELEMENT_TYPE_EMPTY;
5314 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5315 (NXT(2) == 'Y')) {
5316 SKIP(3);
5317 /*
5318 * Element is a generic container.
5319 */
5320 ret = XML_ELEMENT_TYPE_ANY;
5321 } else if (RAW == '(') {
5322 ret = xmlParseElementContentDecl(ctxt, name, &content);
5323 } else {
5324 /*
5325 * [ WFC: PEs in Internal Subset ] error handling.
5326 */
5327 if ((RAW == '%') && (ctxt->external == 0) &&
5328 (ctxt->inputNr == 1)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005329 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
5330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5331 ctxt->sax->error(ctxt->userData,
5332 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005333 } else {
5334 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
5335 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5336 ctxt->sax->error(ctxt->userData,
5337 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5338 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005339 ctxt->wellFormed = 0;
5340 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005341 return(-1);
5342 }
5343
5344 SKIP_BLANKS;
5345 /*
5346 * Pop-up of finished entities.
5347 */
5348 while ((RAW == 0) && (ctxt->inputNr > 1))
5349 xmlPopInput(ctxt);
5350 SKIP_BLANKS;
5351
5352 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005353 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005354 } else {
5355 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005356 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5357 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005358 }
5359
5360 NEXT;
5361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5362 (ctxt->sax->elementDecl != NULL))
5363 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5364 content);
5365 }
5366 if (content != NULL) {
5367 xmlFreeElementContent(content);
5368 }
Owen Taylor3473f882001-02-23 17:55:21 +00005369 }
5370 return(ret);
5371}
5372
5373/**
Owen Taylor3473f882001-02-23 17:55:21 +00005374 * xmlParseConditionalSections
5375 * @ctxt: an XML parser context
5376 *
5377 * [61] conditionalSect ::= includeSect | ignoreSect
5378 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5379 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5380 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5381 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5382 */
5383
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005384static void
Owen Taylor3473f882001-02-23 17:55:21 +00005385xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5386 SKIP(3);
5387 SKIP_BLANKS;
5388 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
5389 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
5390 (NXT(6) == 'E')) {
5391 SKIP(7);
5392 SKIP_BLANKS;
5393 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 } else {
5396 NEXT;
5397 }
5398 if (xmlParserDebugEntities) {
5399 if ((ctxt->input != NULL) && (ctxt->input->filename))
5400 xmlGenericError(xmlGenericErrorContext,
5401 "%s(%d): ", ctxt->input->filename,
5402 ctxt->input->line);
5403 xmlGenericError(xmlGenericErrorContext,
5404 "Entering INCLUDE Conditional Section\n");
5405 }
5406
5407 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5408 (NXT(2) != '>'))) {
5409 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005410 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005411
5412 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5413 xmlParseConditionalSections(ctxt);
5414 } else if (IS_BLANK(CUR)) {
5415 NEXT;
5416 } else if (RAW == '%') {
5417 xmlParsePEReference(ctxt);
5418 } else
5419 xmlParseMarkupDecl(ctxt);
5420
5421 /*
5422 * Pop-up of finished entities.
5423 */
5424 while ((RAW == 0) && (ctxt->inputNr > 1))
5425 xmlPopInput(ctxt);
5426
Daniel Veillardfdc91562002-07-01 21:52:03 +00005427 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005428 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 break;
5430 }
5431 }
5432 if (xmlParserDebugEntities) {
5433 if ((ctxt->input != NULL) && (ctxt->input->filename))
5434 xmlGenericError(xmlGenericErrorContext,
5435 "%s(%d): ", ctxt->input->filename,
5436 ctxt->input->line);
5437 xmlGenericError(xmlGenericErrorContext,
5438 "Leaving INCLUDE Conditional Section\n");
5439 }
5440
5441 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
5442 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
5443 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005444 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005445 int depth = 0;
5446
5447 SKIP(6);
5448 SKIP_BLANKS;
5449 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005450 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 } else {
5452 NEXT;
5453 }
5454 if (xmlParserDebugEntities) {
5455 if ((ctxt->input != NULL) && (ctxt->input->filename))
5456 xmlGenericError(xmlGenericErrorContext,
5457 "%s(%d): ", ctxt->input->filename,
5458 ctxt->input->line);
5459 xmlGenericError(xmlGenericErrorContext,
5460 "Entering IGNORE Conditional Section\n");
5461 }
5462
5463 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005464 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005465 * But disable SAX event generating DTD building in the meantime
5466 */
5467 state = ctxt->disableSAX;
5468 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005469 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005470 ctxt->instate = XML_PARSER_IGNORE;
5471
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005472 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005473 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5474 depth++;
5475 SKIP(3);
5476 continue;
5477 }
5478 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5479 if (--depth >= 0) SKIP(3);
5480 continue;
5481 }
5482 NEXT;
5483 continue;
5484 }
5485
5486 ctxt->disableSAX = state;
5487 ctxt->instate = instate;
5488
5489 if (xmlParserDebugEntities) {
5490 if ((ctxt->input != NULL) && (ctxt->input->filename))
5491 xmlGenericError(xmlGenericErrorContext,
5492 "%s(%d): ", ctxt->input->filename,
5493 ctxt->input->line);
5494 xmlGenericError(xmlGenericErrorContext,
5495 "Leaving IGNORE Conditional Section\n");
5496 }
5497
5498 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005499 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
5501
5502 if (RAW == 0)
5503 SHRINK;
5504
5505 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005506 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005507 } else {
5508 SKIP(3);
5509 }
5510}
5511
5512/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005513 * xmlParseMarkupDecl:
5514 * @ctxt: an XML parser context
5515 *
5516 * parse Markup declarations
5517 *
5518 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5519 * NotationDecl | PI | Comment
5520 *
5521 * [ VC: Proper Declaration/PE Nesting ]
5522 * Parameter-entity replacement text must be properly nested with
5523 * markup declarations. That is to say, if either the first character
5524 * or the last character of a markup declaration (markupdecl above) is
5525 * contained in the replacement text for a parameter-entity reference,
5526 * both must be contained in the same replacement text.
5527 *
5528 * [ WFC: PEs in Internal Subset ]
5529 * In the internal DTD subset, parameter-entity references can occur
5530 * only where markup declarations can occur, not within markup declarations.
5531 * (This does not apply to references that occur in external parameter
5532 * entities or to the external subset.)
5533 */
5534void
5535xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5536 GROW;
5537 xmlParseElementDecl(ctxt);
5538 xmlParseAttributeListDecl(ctxt);
5539 xmlParseEntityDecl(ctxt);
5540 xmlParseNotationDecl(ctxt);
5541 xmlParsePI(ctxt);
5542 xmlParseComment(ctxt);
5543 /*
5544 * This is only for internal subset. On external entities,
5545 * the replacement is done before parsing stage
5546 */
5547 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5548 xmlParsePEReference(ctxt);
5549
5550 /*
5551 * Conditional sections are allowed from entities included
5552 * by PE References in the internal subset.
5553 */
5554 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5555 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5556 xmlParseConditionalSections(ctxt);
5557 }
5558 }
5559
5560 ctxt->instate = XML_PARSER_DTD;
5561}
5562
5563/**
5564 * xmlParseTextDecl:
5565 * @ctxt: an XML parser context
5566 *
5567 * parse an XML declaration header for external entities
5568 *
5569 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5570 *
5571 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5572 */
5573
5574void
5575xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5576 xmlChar *version;
5577
5578 /*
5579 * We know that '<?xml' is here.
5580 */
5581 if ((RAW == '<') && (NXT(1) == '?') &&
5582 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5583 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5584 SKIP(5);
5585 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005586 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005587 return;
5588 }
5589
5590 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005591 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5592 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005593 }
5594 SKIP_BLANKS;
5595
5596 /*
5597 * We may have the VersionInfo here.
5598 */
5599 version = xmlParseVersionInfo(ctxt);
5600 if (version == NULL)
5601 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005602 else {
5603 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005604 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5605 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005606 }
5607 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005608 ctxt->input->version = version;
5609
5610 /*
5611 * We must have the encoding declaration
5612 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005613 xmlParseEncodingDecl(ctxt);
5614 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5615 /*
5616 * The XML REC instructs us to stop parsing right here
5617 */
5618 return;
5619 }
5620
5621 SKIP_BLANKS;
5622 if ((RAW == '?') && (NXT(1) == '>')) {
5623 SKIP(2);
5624 } else if (RAW == '>') {
5625 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005626 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005627 NEXT;
5628 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005629 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005630 MOVETO_ENDTAG(CUR_PTR);
5631 NEXT;
5632 }
5633}
5634
5635/**
Owen Taylor3473f882001-02-23 17:55:21 +00005636 * xmlParseExternalSubset:
5637 * @ctxt: an XML parser context
5638 * @ExternalID: the external identifier
5639 * @SystemID: the system identifier (or URL)
5640 *
5641 * parse Markup declarations from an external subset
5642 *
5643 * [30] extSubset ::= textDecl? extSubsetDecl
5644 *
5645 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5646 */
5647void
5648xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5649 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005650 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005651 GROW;
5652 if ((RAW == '<') && (NXT(1) == '?') &&
5653 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5654 (NXT(4) == 'l')) {
5655 xmlParseTextDecl(ctxt);
5656 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5657 /*
5658 * The XML REC instructs us to stop parsing right here
5659 */
5660 ctxt->instate = XML_PARSER_EOF;
5661 return;
5662 }
5663 }
5664 if (ctxt->myDoc == NULL) {
5665 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5666 }
5667 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5668 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5669
5670 ctxt->instate = XML_PARSER_DTD;
5671 ctxt->external = 1;
5672 while (((RAW == '<') && (NXT(1) == '?')) ||
5673 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005674 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005675 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005676 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005677
5678 GROW;
5679 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5680 xmlParseConditionalSections(ctxt);
5681 } else if (IS_BLANK(CUR)) {
5682 NEXT;
5683 } else if (RAW == '%') {
5684 xmlParsePEReference(ctxt);
5685 } else
5686 xmlParseMarkupDecl(ctxt);
5687
5688 /*
5689 * Pop-up of finished entities.
5690 */
5691 while ((RAW == 0) && (ctxt->inputNr > 1))
5692 xmlPopInput(ctxt);
5693
Daniel Veillardfdc91562002-07-01 21:52:03 +00005694 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005695 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005696 break;
5697 }
5698 }
5699
5700 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005701 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703
5704}
5705
5706/**
5707 * xmlParseReference:
5708 * @ctxt: an XML parser context
5709 *
5710 * parse and handle entity references in content, depending on the SAX
5711 * interface, this may end-up in a call to character() if this is a
5712 * CharRef, a predefined entity, if there is no reference() callback.
5713 * or if the parser was asked to switch to that mode.
5714 *
5715 * [67] Reference ::= EntityRef | CharRef
5716 */
5717void
5718xmlParseReference(xmlParserCtxtPtr ctxt) {
5719 xmlEntityPtr ent;
5720 xmlChar *val;
5721 if (RAW != '&') return;
5722
5723 if (NXT(1) == '#') {
5724 int i = 0;
5725 xmlChar out[10];
5726 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005727 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005728
5729 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5730 /*
5731 * So we are using non-UTF-8 buffers
5732 * Check that the char fit on 8bits, if not
5733 * generate a CharRef.
5734 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005735 if (value <= 0xFF) {
5736 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005737 out[1] = 0;
5738 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5739 (!ctxt->disableSAX))
5740 ctxt->sax->characters(ctxt->userData, out, 1);
5741 } else {
5742 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005743 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005745 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005746 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5747 (!ctxt->disableSAX))
5748 ctxt->sax->reference(ctxt->userData, out);
5749 }
5750 } else {
5751 /*
5752 * Just encode the value in UTF-8
5753 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005754 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005755 out[i] = 0;
5756 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5757 (!ctxt->disableSAX))
5758 ctxt->sax->characters(ctxt->userData, out, i);
5759 }
5760 } else {
5761 ent = xmlParseEntityRef(ctxt);
5762 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005763 if (!ctxt->wellFormed)
5764 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005765 if ((ent->name != NULL) &&
5766 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5767 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005768 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005769
5770
5771 /*
5772 * The first reference to the entity trigger a parsing phase
5773 * where the ent->children is filled with the result from
5774 * the parsing.
5775 */
5776 if (ent->children == NULL) {
5777 xmlChar *value;
5778 value = ent->content;
5779
5780 /*
5781 * Check that this entity is well formed
5782 */
5783 if ((value != NULL) &&
5784 (value[1] == 0) && (value[0] == '<') &&
5785 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5786 /*
5787 * DONE: get definite answer on this !!!
5788 * Lots of entity decls are used to declare a single
5789 * char
5790 * <!ENTITY lt "<">
5791 * Which seems to be valid since
5792 * 2.4: The ampersand character (&) and the left angle
5793 * bracket (<) may appear in their literal form only
5794 * when used ... They are also legal within the literal
5795 * entity value of an internal entity declaration;i
5796 * see "4.3.2 Well-Formed Parsed Entities".
5797 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5798 * Looking at the OASIS test suite and James Clark
5799 * tests, this is broken. However the XML REC uses
5800 * it. Is the XML REC not well-formed ????
5801 * This is a hack to avoid this problem
5802 *
5803 * ANSWER: since lt gt amp .. are already defined,
5804 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005805 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005806 * is lousy but acceptable.
5807 */
5808 list = xmlNewDocText(ctxt->myDoc, value);
5809 if (list != NULL) {
5810 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5811 (ent->children == NULL)) {
5812 ent->children = list;
5813 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005814 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005815 list->parent = (xmlNodePtr) ent;
5816 } else {
5817 xmlFreeNodeList(list);
5818 }
5819 } else if (list != NULL) {
5820 xmlFreeNodeList(list);
5821 }
5822 } else {
5823 /*
5824 * 4.3.2: An internal general parsed entity is well-formed
5825 * if its replacement text matches the production labeled
5826 * content.
5827 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005828
5829 void *user_data;
5830 /*
5831 * This is a bit hackish but this seems the best
5832 * way to make sure both SAX and DOM entity support
5833 * behaves okay.
5834 */
5835 if (ctxt->userData == ctxt)
5836 user_data = NULL;
5837 else
5838 user_data = ctxt->userData;
5839
Owen Taylor3473f882001-02-23 17:55:21 +00005840 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5841 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005842 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5843 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005844 ctxt->depth--;
5845 } else if (ent->etype ==
5846 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5847 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005848 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005849 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005850 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005851 ctxt->depth--;
5852 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005853 ret = XML_ERR_ENTITY_PE_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00005854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5855 ctxt->sax->error(ctxt->userData,
5856 "Internal: invalid entity type\n");
5857 }
5858 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005859 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005860 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005861 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005862 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5863 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005864 (ent->children == NULL)) {
5865 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005866 if (ctxt->replaceEntities) {
5867 /*
5868 * Prune it directly in the generated document
5869 * except for single text nodes.
5870 */
5871 if ((list->type == XML_TEXT_NODE) &&
5872 (list->next == NULL)) {
5873 list->parent = (xmlNodePtr) ent;
5874 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005875 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005876 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005877 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005878 while (list != NULL) {
5879 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005880 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005881 if (list->next == NULL)
5882 ent->last = list;
5883 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005884 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005885 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005886#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005887 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5888 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005889#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005890 }
5891 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005892 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005893 while (list != NULL) {
5894 list->parent = (xmlNodePtr) ent;
5895 if (list->next == NULL)
5896 ent->last = list;
5897 list = list->next;
5898 }
Owen Taylor3473f882001-02-23 17:55:21 +00005899 }
5900 } else {
5901 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005902 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005904 } else if ((ret != XML_ERR_OK) &&
5905 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005906 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005907 } else if (list != NULL) {
5908 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005909 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005910 }
5911 }
5912 }
5913 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5914 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5915 /*
5916 * Create a node.
5917 */
5918 ctxt->sax->reference(ctxt->userData, ent->name);
5919 return;
5920 } else if (ctxt->replaceEntities) {
5921 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5922 /*
5923 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005924 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005925 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005926 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005927 if ((list == NULL) && (ent->owner == 0)) {
5928 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005929 cur = ent->children;
5930 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005931 nw = xmlCopyNode(cur, 1);
5932 if (nw != NULL) {
5933 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005934 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005935 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005936 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005937 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005938 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005939 if (cur == ent->last)
5940 break;
5941 cur = cur->next;
5942 }
Daniel Veillard81273902003-09-30 00:43:48 +00005943#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005944 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005945 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005946#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005947 } else if (list == NULL) {
5948 xmlNodePtr nw = NULL, cur, next, last,
5949 firstChild = NULL;
5950 /*
5951 * Copy the entity child list and make it the new
5952 * entity child list. The goal is to make sure any
5953 * ID or REF referenced will be the one from the
5954 * document content and not the entity copy.
5955 */
5956 cur = ent->children;
5957 ent->children = NULL;
5958 last = ent->last;
5959 ent->last = NULL;
5960 while (cur != NULL) {
5961 next = cur->next;
5962 cur->next = NULL;
5963 cur->parent = NULL;
5964 nw = xmlCopyNode(cur, 1);
5965 if (nw != NULL) {
5966 nw->_private = cur->_private;
5967 if (firstChild == NULL){
5968 firstChild = cur;
5969 }
5970 xmlAddChild((xmlNodePtr) ent, nw);
5971 xmlAddChild(ctxt->node, cur);
5972 }
5973 if (cur == last)
5974 break;
5975 cur = next;
5976 }
5977 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005978#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005979 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5980 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005981#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005982 } else {
5983 /*
5984 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005985 * node with a possible previous text one which
5986 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005987 */
5988 if (ent->children->type == XML_TEXT_NODE)
5989 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5990 if ((ent->last != ent->children) &&
5991 (ent->last->type == XML_TEXT_NODE))
5992 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5993 xmlAddChildList(ctxt->node, ent->children);
5994 }
5995
Owen Taylor3473f882001-02-23 17:55:21 +00005996 /*
5997 * This is to avoid a nasty side effect, see
5998 * characters() in SAX.c
5999 */
6000 ctxt->nodemem = 0;
6001 ctxt->nodelen = 0;
6002 return;
6003 } else {
6004 /*
6005 * Probably running in SAX mode
6006 */
6007 xmlParserInputPtr input;
6008
6009 input = xmlNewEntityInputStream(ctxt, ent);
6010 xmlPushInput(ctxt, input);
6011 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6012 (RAW == '<') && (NXT(1) == '?') &&
6013 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6014 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6015 xmlParseTextDecl(ctxt);
6016 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6017 /*
6018 * The XML REC instructs us to stop parsing right here
6019 */
6020 ctxt->instate = XML_PARSER_EOF;
6021 return;
6022 }
6023 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006024 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6025 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006026 }
6027 }
6028 return;
6029 }
6030 }
6031 } else {
6032 val = ent->content;
6033 if (val == NULL) return;
6034 /*
6035 * inline the entity.
6036 */
6037 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6038 (!ctxt->disableSAX))
6039 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6040 }
6041 }
6042}
6043
6044/**
6045 * xmlParseEntityRef:
6046 * @ctxt: an XML parser context
6047 *
6048 * parse ENTITY references declarations
6049 *
6050 * [68] EntityRef ::= '&' Name ';'
6051 *
6052 * [ WFC: Entity Declared ]
6053 * In a document without any DTD, a document with only an internal DTD
6054 * subset which contains no parameter entity references, or a document
6055 * with "standalone='yes'", the Name given in the entity reference
6056 * must match that in an entity declaration, except that well-formed
6057 * documents need not declare any of the following entities: amp, lt,
6058 * gt, apos, quot. The declaration of a parameter entity must precede
6059 * any reference to it. Similarly, the declaration of a general entity
6060 * must precede any reference to it which appears in a default value in an
6061 * attribute-list declaration. Note that if entities are declared in the
6062 * external subset or in external parameter entities, a non-validating
6063 * processor is not obligated to read and process their declarations;
6064 * for such documents, the rule that an entity must be declared is a
6065 * well-formedness constraint only if standalone='yes'.
6066 *
6067 * [ WFC: Parsed Entity ]
6068 * An entity reference must not contain the name of an unparsed entity
6069 *
6070 * Returns the xmlEntityPtr if found, or NULL otherwise.
6071 */
6072xmlEntityPtr
6073xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006074 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006075 xmlEntityPtr ent = NULL;
6076
6077 GROW;
6078
6079 if (RAW == '&') {
6080 NEXT;
6081 name = xmlParseName(ctxt);
6082 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006083 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6084 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006085 } else {
6086 if (RAW == ';') {
6087 NEXT;
6088 /*
6089 * Ask first SAX for entity resolution, otherwise try the
6090 * predefined set.
6091 */
6092 if (ctxt->sax != NULL) {
6093 if (ctxt->sax->getEntity != NULL)
6094 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006095 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006096 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006097 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6098 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006099 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006100 }
Owen Taylor3473f882001-02-23 17:55:21 +00006101 }
6102 /*
6103 * [ WFC: Entity Declared ]
6104 * In a document without any DTD, a document with only an
6105 * internal DTD subset which contains no parameter entity
6106 * references, or a document with "standalone='yes'", the
6107 * Name given in the entity reference must match that in an
6108 * entity declaration, except that well-formed documents
6109 * need not declare any of the following entities: amp, lt,
6110 * gt, apos, quot.
6111 * The declaration of a parameter entity must precede any
6112 * reference to it.
6113 * Similarly, the declaration of a general entity must
6114 * precede any reference to it which appears in a default
6115 * value in an attribute-list declaration. Note that if
6116 * entities are declared in the external subset or in
6117 * external parameter entities, a non-validating processor
6118 * is not obligated to read and process their declarations;
6119 * for such documents, the rule that an entity must be
6120 * declared is a well-formedness constraint only if
6121 * standalone='yes'.
6122 */
6123 if (ent == NULL) {
6124 if ((ctxt->standalone == 1) ||
6125 ((ctxt->hasExternalSubset == 0) &&
6126 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006127 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006128 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006129 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006130 } else {
6131 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00006132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00006133 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00006134 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00006135 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006136 }
6137 }
6138
6139 /*
6140 * [ WFC: Parsed Entity ]
6141 * An entity reference must not contain the name of an
6142 * unparsed entity
6143 */
6144 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006145 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006146 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006147 }
6148
6149 /*
6150 * [ WFC: No External Entity References ]
6151 * Attribute values cannot contain direct or indirect
6152 * entity references to external entities.
6153 */
6154 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6155 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006156 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6157 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006158 }
6159 /*
6160 * [ WFC: No < in Attribute Values ]
6161 * The replacement text of any entity referred to directly or
6162 * indirectly in an attribute value (other than "&lt;") must
6163 * not contain a <.
6164 */
6165 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6166 (ent != NULL) &&
6167 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6168 (ent->content != NULL) &&
6169 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006170 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006171 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 }
6173
6174 /*
6175 * Internal check, no parameter entities here ...
6176 */
6177 else {
6178 switch (ent->etype) {
6179 case XML_INTERNAL_PARAMETER_ENTITY:
6180 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006181 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6182 "Attempt to reference the parameter entity '%s'\n",
6183 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006184 break;
6185 default:
6186 break;
6187 }
6188 }
6189
6190 /*
6191 * [ WFC: No Recursion ]
6192 * A parsed entity must not contain a recursive reference
6193 * to itself, either directly or indirectly.
6194 * Done somewhere else
6195 */
6196
6197 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006198 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006199 }
Owen Taylor3473f882001-02-23 17:55:21 +00006200 }
6201 }
6202 return(ent);
6203}
6204
6205/**
6206 * xmlParseStringEntityRef:
6207 * @ctxt: an XML parser context
6208 * @str: a pointer to an index in the string
6209 *
6210 * parse ENTITY references declarations, but this version parses it from
6211 * a string value.
6212 *
6213 * [68] EntityRef ::= '&' Name ';'
6214 *
6215 * [ WFC: Entity Declared ]
6216 * In a document without any DTD, a document with only an internal DTD
6217 * subset which contains no parameter entity references, or a document
6218 * with "standalone='yes'", the Name given in the entity reference
6219 * must match that in an entity declaration, except that well-formed
6220 * documents need not declare any of the following entities: amp, lt,
6221 * gt, apos, quot. The declaration of a parameter entity must precede
6222 * any reference to it. Similarly, the declaration of a general entity
6223 * must precede any reference to it which appears in a default value in an
6224 * attribute-list declaration. Note that if entities are declared in the
6225 * external subset or in external parameter entities, a non-validating
6226 * processor is not obligated to read and process their declarations;
6227 * for such documents, the rule that an entity must be declared is a
6228 * well-formedness constraint only if standalone='yes'.
6229 *
6230 * [ WFC: Parsed Entity ]
6231 * An entity reference must not contain the name of an unparsed entity
6232 *
6233 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6234 * is updated to the current location in the string.
6235 */
6236xmlEntityPtr
6237xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6238 xmlChar *name;
6239 const xmlChar *ptr;
6240 xmlChar cur;
6241 xmlEntityPtr ent = NULL;
6242
6243 if ((str == NULL) || (*str == NULL))
6244 return(NULL);
6245 ptr = *str;
6246 cur = *ptr;
6247 if (cur == '&') {
6248 ptr++;
6249 cur = *ptr;
6250 name = xmlParseStringName(ctxt, &ptr);
6251 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006252 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6253 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006254 } else {
6255 if (*ptr == ';') {
6256 ptr++;
6257 /*
6258 * Ask first SAX for entity resolution, otherwise try the
6259 * predefined set.
6260 */
6261 if (ctxt->sax != NULL) {
6262 if (ctxt->sax->getEntity != NULL)
6263 ent = ctxt->sax->getEntity(ctxt->userData, name);
6264 if (ent == NULL)
6265 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006266 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006267 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006268 }
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270 /*
6271 * [ WFC: Entity Declared ]
6272 * In a document without any DTD, a document with only an
6273 * internal DTD subset which contains no parameter entity
6274 * references, or a document with "standalone='yes'", the
6275 * Name given in the entity reference must match that in an
6276 * entity declaration, except that well-formed documents
6277 * need not declare any of the following entities: amp, lt,
6278 * gt, apos, quot.
6279 * The declaration of a parameter entity must precede any
6280 * reference to it.
6281 * Similarly, the declaration of a general entity must
6282 * precede any reference to it which appears in a default
6283 * value in an attribute-list declaration. Note that if
6284 * entities are declared in the external subset or in
6285 * external parameter entities, a non-validating processor
6286 * is not obligated to read and process their declarations;
6287 * for such documents, the rule that an entity must be
6288 * declared is a well-formedness constraint only if
6289 * standalone='yes'.
6290 */
6291 if (ent == NULL) {
6292 if ((ctxt->standalone == 1) ||
6293 ((ctxt->hasExternalSubset == 0) &&
6294 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006295 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006296 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 } else {
6298 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6299 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6300 ctxt->sax->warning(ctxt->userData,
6301 "Entity '%s' not defined\n", name);
6302 }
6303 }
6304
6305 /*
6306 * [ WFC: Parsed Entity ]
6307 * An entity reference must not contain the name of an
6308 * unparsed entity
6309 */
6310 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6311 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6313 ctxt->sax->error(ctxt->userData,
6314 "Entity reference to unparsed entity %s\n", name);
6315 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006316 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006317 }
6318
6319 /*
6320 * [ WFC: No External Entity References ]
6321 * Attribute values cannot contain direct or indirect
6322 * entity references to external entities.
6323 */
6324 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6325 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6326 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6328 ctxt->sax->error(ctxt->userData,
6329 "Attribute references external entity '%s'\n", name);
6330 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006331 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006332 }
6333 /*
6334 * [ WFC: No < in Attribute Values ]
6335 * The replacement text of any entity referred to directly or
6336 * indirectly in an attribute value (other than "&lt;") must
6337 * not contain a <.
6338 */
6339 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6340 (ent != NULL) &&
6341 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6342 (ent->content != NULL) &&
6343 (xmlStrchr(ent->content, '<'))) {
6344 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6346 ctxt->sax->error(ctxt->userData,
6347 "'<' in entity '%s' is not allowed in attributes values\n", name);
6348 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006349 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 }
6351
6352 /*
6353 * Internal check, no parameter entities here ...
6354 */
6355 else {
6356 switch (ent->etype) {
6357 case XML_INTERNAL_PARAMETER_ENTITY:
6358 case XML_EXTERNAL_PARAMETER_ENTITY:
6359 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361 ctxt->sax->error(ctxt->userData,
6362 "Attempt to reference the parameter entity '%s'\n", name);
6363 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006364 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006365 break;
6366 default:
6367 break;
6368 }
6369 }
6370
6371 /*
6372 * [ WFC: No Recursion ]
6373 * A parsed entity must not contain a recursive reference
6374 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006375 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006376 */
6377
6378 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006379 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006380 }
6381 xmlFree(name);
6382 }
6383 }
6384 *str = ptr;
6385 return(ent);
6386}
6387
6388/**
6389 * xmlParsePEReference:
6390 * @ctxt: an XML parser context
6391 *
6392 * parse PEReference declarations
6393 * The entity content is handled directly by pushing it's content as
6394 * a new input stream.
6395 *
6396 * [69] PEReference ::= '%' Name ';'
6397 *
6398 * [ WFC: No Recursion ]
6399 * A parsed entity must not contain a recursive
6400 * reference to itself, either directly or indirectly.
6401 *
6402 * [ WFC: Entity Declared ]
6403 * In a document without any DTD, a document with only an internal DTD
6404 * subset which contains no parameter entity references, or a document
6405 * with "standalone='yes'", ... ... The declaration of a parameter
6406 * entity must precede any reference to it...
6407 *
6408 * [ VC: Entity Declared ]
6409 * In a document with an external subset or external parameter entities
6410 * with "standalone='no'", ... ... The declaration of a parameter entity
6411 * must precede any reference to it...
6412 *
6413 * [ WFC: In DTD ]
6414 * Parameter-entity references may only appear in the DTD.
6415 * NOTE: misleading but this is handled.
6416 */
6417void
6418xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006419 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006420 xmlEntityPtr entity = NULL;
6421 xmlParserInputPtr input;
6422
6423 if (RAW == '%') {
6424 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006425 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006426 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006427 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6428 "xmlParsePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006429 } else {
6430 if (RAW == ';') {
6431 NEXT;
6432 if ((ctxt->sax != NULL) &&
6433 (ctxt->sax->getParameterEntity != NULL))
6434 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6435 name);
6436 if (entity == NULL) {
6437 /*
6438 * [ WFC: Entity Declared ]
6439 * In a document without any DTD, a document with only an
6440 * internal DTD subset which contains no parameter entity
6441 * references, or a document with "standalone='yes'", ...
6442 * ... The declaration of a parameter entity must precede
6443 * any reference to it...
6444 */
6445 if ((ctxt->standalone == 1) ||
6446 ((ctxt->hasExternalSubset == 0) &&
6447 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006448 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006449 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006450 } else {
6451 /*
6452 * [ VC: Entity Declared ]
6453 * In a document with an external subset or external
6454 * parameter entities with "standalone='no'", ...
6455 * ... The declaration of a parameter entity must precede
6456 * any reference to it...
6457 */
6458 if ((!ctxt->disableSAX) &&
6459 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6460 ctxt->sax->warning(ctxt->userData,
6461 "PEReference: %%%s; not found\n", name);
6462 ctxt->valid = 0;
6463 }
6464 } else {
6465 /*
6466 * Internal checking in case the entity quest barfed
6467 */
6468 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6469 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6470 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6471 ctxt->sax->warning(ctxt->userData,
6472 "Internal: %%%s; is not a parameter entity\n", name);
Daniel Veillardf5582f12002-06-11 10:08:16 +00006473 } else if (ctxt->input->free != deallocblankswrapper) {
6474 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
6475 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00006476 } else {
6477 /*
6478 * TODO !!!
6479 * handle the extra spaces added before and after
6480 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6481 */
6482 input = xmlNewEntityInputStream(ctxt, entity);
6483 xmlPushInput(ctxt, input);
6484 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6485 (RAW == '<') && (NXT(1) == '?') &&
6486 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6487 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6488 xmlParseTextDecl(ctxt);
6489 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6490 /*
6491 * The XML REC instructs us to stop parsing
6492 * right here
6493 */
6494 ctxt->instate = XML_PARSER_EOF;
Owen Taylor3473f882001-02-23 17:55:21 +00006495 return;
6496 }
6497 }
Owen Taylor3473f882001-02-23 17:55:21 +00006498 }
6499 }
6500 ctxt->hasPErefs = 1;
6501 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006502 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006503 }
Owen Taylor3473f882001-02-23 17:55:21 +00006504 }
6505 }
6506}
6507
6508/**
6509 * xmlParseStringPEReference:
6510 * @ctxt: an XML parser context
6511 * @str: a pointer to an index in the string
6512 *
6513 * parse PEReference declarations
6514 *
6515 * [69] PEReference ::= '%' Name ';'
6516 *
6517 * [ WFC: No Recursion ]
6518 * A parsed entity must not contain a recursive
6519 * reference to itself, either directly or indirectly.
6520 *
6521 * [ WFC: Entity Declared ]
6522 * In a document without any DTD, a document with only an internal DTD
6523 * subset which contains no parameter entity references, or a document
6524 * with "standalone='yes'", ... ... The declaration of a parameter
6525 * entity must precede any reference to it...
6526 *
6527 * [ VC: Entity Declared ]
6528 * In a document with an external subset or external parameter entities
6529 * with "standalone='no'", ... ... The declaration of a parameter entity
6530 * must precede any reference to it...
6531 *
6532 * [ WFC: In DTD ]
6533 * Parameter-entity references may only appear in the DTD.
6534 * NOTE: misleading but this is handled.
6535 *
6536 * Returns the string of the entity content.
6537 * str is updated to the current value of the index
6538 */
6539xmlEntityPtr
6540xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6541 const xmlChar *ptr;
6542 xmlChar cur;
6543 xmlChar *name;
6544 xmlEntityPtr entity = NULL;
6545
6546 if ((str == NULL) || (*str == NULL)) return(NULL);
6547 ptr = *str;
6548 cur = *ptr;
6549 if (cur == '%') {
6550 ptr++;
6551 cur = *ptr;
6552 name = xmlParseStringName(ctxt, &ptr);
6553 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006554 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6555 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006556 } else {
6557 cur = *ptr;
6558 if (cur == ';') {
6559 ptr++;
6560 cur = *ptr;
6561 if ((ctxt->sax != NULL) &&
6562 (ctxt->sax->getParameterEntity != NULL))
6563 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6564 name);
6565 if (entity == NULL) {
6566 /*
6567 * [ WFC: Entity Declared ]
6568 * In a document without any DTD, a document with only an
6569 * internal DTD subset which contains no parameter entity
6570 * references, or a document with "standalone='yes'", ...
6571 * ... The declaration of a parameter entity must precede
6572 * any reference to it...
6573 */
6574 if ((ctxt->standalone == 1) ||
6575 ((ctxt->hasExternalSubset == 0) &&
6576 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006578 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 } else {
6580 /*
6581 * [ VC: Entity Declared ]
6582 * In a document with an external subset or external
6583 * parameter entities with "standalone='no'", ...
6584 * ... The declaration of a parameter entity must
6585 * precede any reference to it...
6586 */
6587 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6588 ctxt->sax->warning(ctxt->userData,
6589 "PEReference: %%%s; not found\n", name);
6590 ctxt->valid = 0;
6591 }
6592 } else {
6593 /*
6594 * Internal checking in case the entity quest barfed
6595 */
6596 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6597 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6598 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6599 ctxt->sax->warning(ctxt->userData,
6600 "Internal: %%%s; is not a parameter entity\n", name);
6601 }
6602 }
6603 ctxt->hasPErefs = 1;
6604 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006605 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006606 }
6607 xmlFree(name);
6608 }
6609 }
6610 *str = ptr;
6611 return(entity);
6612}
6613
6614/**
6615 * xmlParseDocTypeDecl:
6616 * @ctxt: an XML parser context
6617 *
6618 * parse a DOCTYPE declaration
6619 *
6620 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6621 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6622 *
6623 * [ VC: Root Element Type ]
6624 * The Name in the document type declaration must match the element
6625 * type of the root element.
6626 */
6627
6628void
6629xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006630 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006631 xmlChar *ExternalID = NULL;
6632 xmlChar *URI = NULL;
6633
6634 /*
6635 * We know that '<!DOCTYPE' has been detected.
6636 */
6637 SKIP(9);
6638
6639 SKIP_BLANKS;
6640
6641 /*
6642 * Parse the DOCTYPE name.
6643 */
6644 name = xmlParseName(ctxt);
6645 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006646 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6647 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006648 }
6649 ctxt->intSubName = name;
6650
6651 SKIP_BLANKS;
6652
6653 /*
6654 * Check for SystemID and ExternalID
6655 */
6656 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6657
6658 if ((URI != NULL) || (ExternalID != NULL)) {
6659 ctxt->hasExternalSubset = 1;
6660 }
6661 ctxt->extSubURI = URI;
6662 ctxt->extSubSystem = ExternalID;
6663
6664 SKIP_BLANKS;
6665
6666 /*
6667 * Create and update the internal subset.
6668 */
6669 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6670 (!ctxt->disableSAX))
6671 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6672
6673 /*
6674 * Is there any internal subset declarations ?
6675 * they are handled separately in xmlParseInternalSubset()
6676 */
6677 if (RAW == '[')
6678 return;
6679
6680 /*
6681 * We should be at the end of the DOCTYPE declaration.
6682 */
6683 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006684 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 }
6686 NEXT;
6687}
6688
6689/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006690 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006691 * @ctxt: an XML parser context
6692 *
6693 * parse the internal subset declaration
6694 *
6695 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6696 */
6697
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006698static void
Owen Taylor3473f882001-02-23 17:55:21 +00006699xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6700 /*
6701 * Is there any DTD definition ?
6702 */
6703 if (RAW == '[') {
6704 ctxt->instate = XML_PARSER_DTD;
6705 NEXT;
6706 /*
6707 * Parse the succession of Markup declarations and
6708 * PEReferences.
6709 * Subsequence (markupdecl | PEReference | S)*
6710 */
6711 while (RAW != ']') {
6712 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006713 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006714
6715 SKIP_BLANKS;
6716 xmlParseMarkupDecl(ctxt);
6717 xmlParsePEReference(ctxt);
6718
6719 /*
6720 * Pop-up of finished entities.
6721 */
6722 while ((RAW == 0) && (ctxt->inputNr > 1))
6723 xmlPopInput(ctxt);
6724
6725 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006726 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006727 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006728 break;
6729 }
6730 }
6731 if (RAW == ']') {
6732 NEXT;
6733 SKIP_BLANKS;
6734 }
6735 }
6736
6737 /*
6738 * We should be at the end of the DOCTYPE declaration.
6739 */
6740 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006741 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006742 }
6743 NEXT;
6744}
6745
Daniel Veillard81273902003-09-30 00:43:48 +00006746#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006747/**
6748 * xmlParseAttribute:
6749 * @ctxt: an XML parser context
6750 * @value: a xmlChar ** used to store the value of the attribute
6751 *
6752 * parse an attribute
6753 *
6754 * [41] Attribute ::= Name Eq AttValue
6755 *
6756 * [ WFC: No External Entity References ]
6757 * Attribute values cannot contain direct or indirect entity references
6758 * to external entities.
6759 *
6760 * [ WFC: No < in Attribute Values ]
6761 * The replacement text of any entity referred to directly or indirectly in
6762 * an attribute value (other than "&lt;") must not contain a <.
6763 *
6764 * [ VC: Attribute Value Type ]
6765 * The attribute must have been declared; the value must be of the type
6766 * declared for it.
6767 *
6768 * [25] Eq ::= S? '=' S?
6769 *
6770 * With namespace:
6771 *
6772 * [NS 11] Attribute ::= QName Eq AttValue
6773 *
6774 * Also the case QName == xmlns:??? is handled independently as a namespace
6775 * definition.
6776 *
6777 * Returns the attribute name, and the value in *value.
6778 */
6779
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006780const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006781xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006782 const xmlChar *name;
6783 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006784
6785 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006786 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006787 name = xmlParseName(ctxt);
6788 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006789 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6790 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006791 return(NULL);
6792 }
6793
6794 /*
6795 * read the value
6796 */
6797 SKIP_BLANKS;
6798 if (RAW == '=') {
6799 NEXT;
6800 SKIP_BLANKS;
6801 val = xmlParseAttValue(ctxt);
6802 ctxt->instate = XML_PARSER_CONTENT;
6803 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006804 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006805 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006806 return(NULL);
6807 }
6808
6809 /*
6810 * Check that xml:lang conforms to the specification
6811 * No more registered as an error, just generate a warning now
6812 * since this was deprecated in XML second edition
6813 */
6814 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6815 if (!xmlCheckLanguageID(val)) {
6816 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6817 ctxt->sax->warning(ctxt->userData,
6818 "Malformed value for xml:lang : %s\n", val);
6819 }
6820 }
6821
6822 /*
6823 * Check that xml:space conforms to the specification
6824 */
6825 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6826 if (xmlStrEqual(val, BAD_CAST "default"))
6827 *(ctxt->space) = 0;
6828 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6829 *(ctxt->space) = 1;
6830 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006831 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006832"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006833 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006834 }
6835 }
6836
6837 *value = val;
6838 return(name);
6839}
6840
6841/**
6842 * xmlParseStartTag:
6843 * @ctxt: an XML parser context
6844 *
6845 * parse a start of tag either for rule element or
6846 * EmptyElement. In both case we don't parse the tag closing chars.
6847 *
6848 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6849 *
6850 * [ WFC: Unique Att Spec ]
6851 * No attribute name may appear more than once in the same start-tag or
6852 * empty-element tag.
6853 *
6854 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6855 *
6856 * [ WFC: Unique Att Spec ]
6857 * No attribute name may appear more than once in the same start-tag or
6858 * empty-element tag.
6859 *
6860 * With namespace:
6861 *
6862 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6863 *
6864 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6865 *
6866 * Returns the element name parsed
6867 */
6868
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006869const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006870xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006871 const xmlChar *name;
6872 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006873 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006874 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006875 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006876 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006877 int i;
6878
6879 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006880 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006881
6882 name = xmlParseName(ctxt);
6883 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006884 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006885 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006886 return(NULL);
6887 }
6888
6889 /*
6890 * Now parse the attributes, it ends up with the ending
6891 *
6892 * (S Attribute)* S?
6893 */
6894 SKIP_BLANKS;
6895 GROW;
6896
Daniel Veillard21a0f912001-02-25 19:54:14 +00006897 while ((RAW != '>') &&
6898 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006899 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006900 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006901 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006902
6903 attname = xmlParseAttribute(ctxt, &attvalue);
6904 if ((attname != NULL) && (attvalue != NULL)) {
6905 /*
6906 * [ WFC: Unique Att Spec ]
6907 * No attribute name may appear more than once in the same
6908 * start-tag or empty-element tag.
6909 */
6910 for (i = 0; i < nbatts;i += 2) {
6911 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006912 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006913 xmlFree(attvalue);
6914 goto failed;
6915 }
6916 }
Owen Taylor3473f882001-02-23 17:55:21 +00006917 /*
6918 * Add the pair to atts
6919 */
6920 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006921 maxatts = 22; /* allow for 10 attrs by default */
6922 atts = (const xmlChar **)
6923 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006924 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006925 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006926 if (attvalue != NULL)
6927 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006928 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006929 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006930 ctxt->atts = atts;
6931 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006932 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006933 const xmlChar **n;
6934
Owen Taylor3473f882001-02-23 17:55:21 +00006935 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006936 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006937 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006938 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006939 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006940 if (attvalue != NULL)
6941 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006942 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006943 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006944 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006945 ctxt->atts = atts;
6946 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006947 }
6948 atts[nbatts++] = attname;
6949 atts[nbatts++] = attvalue;
6950 atts[nbatts] = NULL;
6951 atts[nbatts + 1] = NULL;
6952 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006953 if (attvalue != NULL)
6954 xmlFree(attvalue);
6955 }
6956
6957failed:
6958
Daniel Veillard3772de32002-12-17 10:31:45 +00006959 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006960 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6961 break;
6962 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006963 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6964 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006965 }
6966 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006967 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6968 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006969 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6970 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006971 break;
6972 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006973 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006974 GROW;
6975 }
6976
6977 /*
6978 * SAX: Start of Element !
6979 */
6980 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006981 (!ctxt->disableSAX)) {
6982 if (nbatts > 0)
6983 ctxt->sax->startElement(ctxt->userData, name, atts);
6984 else
6985 ctxt->sax->startElement(ctxt->userData, name, NULL);
6986 }
Owen Taylor3473f882001-02-23 17:55:21 +00006987
6988 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006989 /* Free only the content strings */
6990 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006991 if (atts[i] != NULL)
6992 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006993 }
6994 return(name);
6995}
6996
6997/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006998 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006999 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007000 * @line: line of the start tag
7001 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007002 *
7003 * parse an end of tag
7004 *
7005 * [42] ETag ::= '</' Name S? '>'
7006 *
7007 * With namespace
7008 *
7009 * [NS 9] ETag ::= '</' QName S? '>'
7010 */
7011
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007012static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007013xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007014 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007015
7016 GROW;
7017 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007018 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7019 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007020 return;
7021 }
7022 SKIP(2);
7023
Daniel Veillard46de64e2002-05-29 08:21:33 +00007024 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007025
7026 /*
7027 * We should definitely be at the ending "S? '>'" part
7028 */
7029 GROW;
7030 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007031 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007032 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007033 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007034 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007035
7036 /*
7037 * [ WFC: Element Type Match ]
7038 * The Name in an element's end-tag must match the element type in the
7039 * start-tag.
7040 *
7041 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007042 if (name != (xmlChar*)1) {
Owen Taylor3473f882001-02-23 17:55:21 +00007043 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
7044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00007045 if (name != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00007046 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007047 "Opening and ending tag mismatch: %s line %d and %s\n",
7048 ctxt->name, line, name);
Daniel Veillard46de64e2002-05-29 08:21:33 +00007049 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007050 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007051 "Ending tag error for: %s line %d\n", ctxt->name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00007052 }
7053
7054 }
7055 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00007056 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007057 }
7058
7059 /*
7060 * SAX: End of Tag
7061 */
7062 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7063 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007064 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007065
Daniel Veillarde57ec792003-09-10 10:50:59 +00007066 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007067 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007068 return;
7069}
7070
7071/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007072 * xmlParseEndTag:
7073 * @ctxt: an XML parser context
7074 *
7075 * parse an end of tag
7076 *
7077 * [42] ETag ::= '</' Name S? '>'
7078 *
7079 * With namespace
7080 *
7081 * [NS 9] ETag ::= '</' QName S? '>'
7082 */
7083
7084void
7085xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007086 xmlParseEndTag1(ctxt, 0);
7087}
Daniel Veillard81273902003-09-30 00:43:48 +00007088#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007089
7090/************************************************************************
7091 * *
7092 * SAX 2 specific operations *
7093 * *
7094 ************************************************************************/
7095
7096static const xmlChar *
7097xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7098 int len = 0, l;
7099 int c;
7100 int count = 0;
7101
7102 /*
7103 * Handler for more complex cases
7104 */
7105 GROW;
7106 c = CUR_CHAR(l);
7107 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007108 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007109 return(NULL);
7110 }
7111
7112 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard73b013f2003-09-30 12:36:01 +00007113 ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007114 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00007115 (xmlIsCombining(c)) ||
7116 (xmlIsExtender(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007117 if (count++ > 100) {
7118 count = 0;
7119 GROW;
7120 }
7121 len += l;
7122 NEXTL(l);
7123 c = CUR_CHAR(l);
7124 }
7125 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7126}
7127
7128/*
7129 * xmlGetNamespace:
7130 * @ctxt: an XML parser context
7131 * @prefix: the prefix to lookup
7132 *
7133 * Lookup the namespace name for the @prefix (which ca be NULL)
7134 * The prefix must come from the @ctxt->dict dictionnary
7135 *
7136 * Returns the namespace name or NULL if not bound
7137 */
7138static const xmlChar *
7139xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7140 int i;
7141
Daniel Veillarde57ec792003-09-10 10:50:59 +00007142 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007143 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007144 if (ctxt->nsTab[i] == prefix) {
7145 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7146 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007147 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007148 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007149 return(NULL);
7150}
7151
7152/**
7153 * xmlParseNCName:
7154 * @ctxt: an XML parser context
7155 *
7156 * parse an XML name.
7157 *
7158 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7159 * CombiningChar | Extender
7160 *
7161 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7162 *
7163 * Returns the Name parsed or NULL
7164 */
7165
7166static const xmlChar *
7167xmlParseNCName(xmlParserCtxtPtr ctxt) {
7168 const xmlChar *in;
7169 const xmlChar *ret;
7170 int count = 0;
7171
7172 /*
7173 * Accelerator for simple ASCII names
7174 */
7175 in = ctxt->input->cur;
7176 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7177 ((*in >= 0x41) && (*in <= 0x5A)) ||
7178 (*in == '_')) {
7179 in++;
7180 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7181 ((*in >= 0x41) && (*in <= 0x5A)) ||
7182 ((*in >= 0x30) && (*in <= 0x39)) ||
7183 (*in == '_') || (*in == '-') ||
7184 (*in == '.'))
7185 in++;
7186 if ((*in > 0) && (*in < 0x80)) {
7187 count = in - ctxt->input->cur;
7188 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7189 ctxt->input->cur = in;
7190 ctxt->nbChars += count;
7191 ctxt->input->col += count;
7192 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007193 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194 }
7195 return(ret);
7196 }
7197 }
7198 return(xmlParseNCNameComplex(ctxt));
7199}
7200
7201/**
7202 * xmlParseQName:
7203 * @ctxt: an XML parser context
7204 * @prefix: pointer to store the prefix part
7205 *
7206 * parse an XML Namespace QName
7207 *
7208 * [6] QName ::= (Prefix ':')? LocalPart
7209 * [7] Prefix ::= NCName
7210 * [8] LocalPart ::= NCName
7211 *
7212 * Returns the Name parsed or NULL
7213 */
7214
7215static const xmlChar *
7216xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7217 const xmlChar *l, *p;
7218
7219 GROW;
7220
7221 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007222 if (l == NULL) {
7223 if (CUR == ':') {
7224 l = xmlParseName(ctxt);
7225 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007226 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7227 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007228 *prefix = NULL;
7229 return(l);
7230 }
7231 }
7232 return(NULL);
7233 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007234 if (CUR == ':') {
7235 NEXT;
7236 p = l;
7237 l = xmlParseNCName(ctxt);
7238 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007239 xmlChar *tmp;
7240
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007241 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7242 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007243 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7244 p = xmlDictLookup(ctxt->dict, tmp, -1);
7245 if (tmp != NULL) xmlFree(tmp);
7246 *prefix = NULL;
7247 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007248 }
7249 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007250 xmlChar *tmp;
7251
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007252 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7253 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007254 NEXT;
7255 tmp = (xmlChar *) xmlParseName(ctxt);
7256 if (tmp != NULL) {
7257 tmp = xmlBuildQName(tmp, l, NULL, 0);
7258 l = xmlDictLookup(ctxt->dict, tmp, -1);
7259 if (tmp != NULL) xmlFree(tmp);
7260 *prefix = p;
7261 return(l);
7262 }
7263 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7264 l = xmlDictLookup(ctxt->dict, tmp, -1);
7265 if (tmp != NULL) xmlFree(tmp);
7266 *prefix = p;
7267 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007268 }
7269 *prefix = p;
7270 } else
7271 *prefix = NULL;
7272 return(l);
7273}
7274
7275/**
7276 * xmlParseQNameAndCompare:
7277 * @ctxt: an XML parser context
7278 * @name: the localname
7279 * @prefix: the prefix, if any.
7280 *
7281 * parse an XML name and compares for match
7282 * (specialized for endtag parsing)
7283 *
7284 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7285 * and the name for mismatch
7286 */
7287
7288static const xmlChar *
7289xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7290 xmlChar const *prefix) {
7291 const xmlChar *cmp = name;
7292 const xmlChar *in;
7293 const xmlChar *ret;
7294 const xmlChar *prefix2;
7295
7296 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7297
7298 GROW;
7299 in = ctxt->input->cur;
7300
7301 cmp = prefix;
7302 while (*in != 0 && *in == *cmp) {
7303 ++in;
7304 ++cmp;
7305 }
7306 if ((*cmp == 0) && (*in == ':')) {
7307 in++;
7308 cmp = name;
7309 while (*in != 0 && *in == *cmp) {
7310 ++in;
7311 ++cmp;
7312 }
7313 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7314 /* success */
7315 ctxt->input->cur = in;
7316 return((const xmlChar*) 1);
7317 }
7318 }
7319 /*
7320 * all strings coms from the dictionary, equality can be done directly
7321 */
7322 ret = xmlParseQName (ctxt, &prefix2);
7323 if ((ret == name) && (prefix == prefix2))
7324 return((const xmlChar*) 1);
7325 return ret;
7326}
7327
7328/**
7329 * xmlParseAttValueInternal:
7330 * @ctxt: an XML parser context
7331 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007332 * @alloc: whether the attribute was reallocated as a new string
7333 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 *
7335 * parse a value for an attribute.
7336 * NOTE: if no normalization is needed, the routine will return pointers
7337 * directly from the data buffer.
7338 *
7339 * 3.3.3 Attribute-Value Normalization:
7340 * Before the value of an attribute is passed to the application or
7341 * checked for validity, the XML processor must normalize it as follows:
7342 * - a character reference is processed by appending the referenced
7343 * character to the attribute value
7344 * - an entity reference is processed by recursively processing the
7345 * replacement text of the entity
7346 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7347 * appending #x20 to the normalized value, except that only a single
7348 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7349 * parsed entity or the literal entity value of an internal parsed entity
7350 * - other characters are processed by appending them to the normalized value
7351 * If the declared value is not CDATA, then the XML processor must further
7352 * process the normalized attribute value by discarding any leading and
7353 * trailing space (#x20) characters, and by replacing sequences of space
7354 * (#x20) characters by a single space (#x20) character.
7355 * All attributes for which no declaration has been read should be treated
7356 * by a non-validating parser as if declared CDATA.
7357 *
7358 * Returns the AttValue parsed or NULL. The value has to be freed by the
7359 * caller if it was copied, this can be detected by val[*len] == 0.
7360 */
7361
7362static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007363xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7364 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007365{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007366 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007367 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368 xmlChar *ret = NULL;
7369
7370 GROW;
7371 in = (xmlChar *) CUR_PTR;
7372 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007373 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007374 return (NULL);
7375 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007376 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007377
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007378 /*
7379 * try to handle in this routine the most common case where no
7380 * allocation of a new string is required and where content is
7381 * pure ASCII.
7382 */
7383 limit = *in++;
7384 end = ctxt->input->end;
7385 start = in;
7386 if (in >= end) {
7387 const xmlChar *oldbase = ctxt->input->base;
7388 GROW;
7389 if (oldbase != ctxt->input->base) {
7390 long delta = ctxt->input->base - oldbase;
7391 start = start + delta;
7392 in = in + delta;
7393 }
7394 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007395 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007396 if (normalize) {
7397 /*
7398 * Skip any leading spaces
7399 */
7400 while ((in < end) && (*in != limit) &&
7401 ((*in == 0x20) || (*in == 0x9) ||
7402 (*in == 0xA) || (*in == 0xD))) {
7403 in++;
7404 start = in;
7405 if (in >= end) {
7406 const xmlChar *oldbase = ctxt->input->base;
7407 GROW;
7408 if (oldbase != ctxt->input->base) {
7409 long delta = ctxt->input->base - oldbase;
7410 start = start + delta;
7411 in = in + delta;
7412 }
7413 end = ctxt->input->end;
7414 }
7415 }
7416 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7417 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7418 if ((*in++ == 0x20) && (*in == 0x20)) break;
7419 if (in >= end) {
7420 const xmlChar *oldbase = ctxt->input->base;
7421 GROW;
7422 if (oldbase != ctxt->input->base) {
7423 long delta = ctxt->input->base - oldbase;
7424 start = start + delta;
7425 in = in + delta;
7426 }
7427 end = ctxt->input->end;
7428 }
7429 }
7430 last = in;
7431 /*
7432 * skip the trailing blanks
7433 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007434 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007435 while ((in < end) && (*in != limit) &&
7436 ((*in == 0x20) || (*in == 0x9) ||
7437 (*in == 0xA) || (*in == 0xD))) {
7438 in++;
7439 if (in >= end) {
7440 const xmlChar *oldbase = ctxt->input->base;
7441 GROW;
7442 if (oldbase != ctxt->input->base) {
7443 long delta = ctxt->input->base - oldbase;
7444 start = start + delta;
7445 in = in + delta;
7446 last = last + delta;
7447 }
7448 end = ctxt->input->end;
7449 }
7450 }
7451 if (*in != limit) goto need_complex;
7452 } else {
7453 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7454 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7455 in++;
7456 if (in >= end) {
7457 const xmlChar *oldbase = ctxt->input->base;
7458 GROW;
7459 if (oldbase != ctxt->input->base) {
7460 long delta = ctxt->input->base - oldbase;
7461 start = start + delta;
7462 in = in + delta;
7463 }
7464 end = ctxt->input->end;
7465 }
7466 }
7467 last = in;
7468 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007469 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007470 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007471 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007472 *len = last - start;
7473 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007474 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007475 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007476 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477 }
7478 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007479 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007480 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007481need_complex:
7482 if (alloc) *alloc = 1;
7483 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007484}
7485
7486/**
7487 * xmlParseAttribute2:
7488 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007489 * @pref: the element prefix
7490 * @elem: the element name
7491 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007492 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007493 * @len: an int * to save the length of the attribute
7494 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007495 *
7496 * parse an attribute in the new SAX2 framework.
7497 *
7498 * Returns the attribute name, and the value in *value, .
7499 */
7500
7501static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007502xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7503 const xmlChar *pref, const xmlChar *elem,
7504 const xmlChar **prefix, xmlChar **value,
7505 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506 const xmlChar *name;
7507 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007508 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007509
7510 *value = NULL;
7511 GROW;
7512 name = xmlParseQName(ctxt, prefix);
7513 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007514 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7515 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007516 return(NULL);
7517 }
7518
7519 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007520 * get the type if needed
7521 */
7522 if (ctxt->attsSpecial != NULL) {
7523 int type;
7524
7525 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7526 pref, elem, *prefix, name);
7527 if (type != 0) normalize = 1;
7528 }
7529
7530 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007531 * read the value
7532 */
7533 SKIP_BLANKS;
7534 if (RAW == '=') {
7535 NEXT;
7536 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007537 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007538 ctxt->instate = XML_PARSER_CONTENT;
7539 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007540 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007541 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007542 return(NULL);
7543 }
7544
7545 /*
7546 * Check that xml:lang conforms to the specification
7547 * No more registered as an error, just generate a warning now
7548 * since this was deprecated in XML second edition
7549 */
7550 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7551 if (!xmlCheckLanguageID(val)) {
7552 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7553 ctxt->sax->warning(ctxt->userData,
7554 "Malformed value for xml:lang : %s\n", val);
7555 }
7556 }
7557
7558 /*
7559 * Check that xml:space conforms to the specification
7560 */
7561 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7562 if (xmlStrEqual(val, BAD_CAST "default"))
7563 *(ctxt->space) = 0;
7564 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7565 *(ctxt->space) = 1;
7566 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007567 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007568"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7569 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007570 }
7571 }
7572
7573 *value = val;
7574 return(name);
7575}
7576
7577/**
7578 * xmlParseStartTag2:
7579 * @ctxt: an XML parser context
7580 *
7581 * parse a start of tag either for rule element or
7582 * EmptyElement. In both case we don't parse the tag closing chars.
7583 * This routine is called when running SAX2 parsing
7584 *
7585 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7586 *
7587 * [ WFC: Unique Att Spec ]
7588 * No attribute name may appear more than once in the same start-tag or
7589 * empty-element tag.
7590 *
7591 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7592 *
7593 * [ WFC: Unique Att Spec ]
7594 * No attribute name may appear more than once in the same start-tag or
7595 * empty-element tag.
7596 *
7597 * With namespace:
7598 *
7599 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7600 *
7601 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7602 *
7603 * Returns the element name parsed
7604 */
7605
7606static const xmlChar *
7607xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7608 const xmlChar **URI) {
7609 const xmlChar *localname;
7610 const xmlChar *prefix;
7611 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007612 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007613 const xmlChar *nsname;
7614 xmlChar *attvalue;
7615 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007616 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007617 int nratts, nbatts, nbdef;
7618 int i, j, nbNs, attval;
7619 const xmlChar *base;
7620 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007621
7622 if (RAW != '<') return(NULL);
7623 NEXT1;
7624
7625 /*
7626 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7627 * point since the attribute values may be stored as pointers to
7628 * the buffer and calling SHRINK would destroy them !
7629 * The Shrinking is only possible once the full set of attribute
7630 * callbacks have been done.
7631 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007632reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007634 base = ctxt->input->base;
7635 cur = ctxt->input->cur - ctxt->input->base;
7636 nbatts = 0;
7637 nratts = 0;
7638 nbdef = 0;
7639 nbNs = 0;
7640 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007641
7642 localname = xmlParseQName(ctxt, &prefix);
7643 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007644 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7645 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007646 return(NULL);
7647 }
7648
7649 /*
7650 * Now parse the attributes, it ends up with the ending
7651 *
7652 * (S Attribute)* S?
7653 */
7654 SKIP_BLANKS;
7655 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007656 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007657
7658 while ((RAW != '>') &&
7659 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007660 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 const xmlChar *q = CUR_PTR;
7662 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007663 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007665 attname = xmlParseAttribute2(ctxt, prefix, localname,
7666 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007667 if ((attname != NULL) && (attvalue != NULL)) {
7668 if (len < 0) len = xmlStrlen(attvalue);
7669 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007670 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7671 xmlURIPtr uri;
7672
7673 if (*URL != 0) {
7674 uri = xmlParseURI((const char *) URL);
7675 if (uri == NULL) {
7676 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7677 ctxt->sax->warning(ctxt->userData,
7678 "xmlns: %s not a valid URI\n", URL);
7679 } else {
7680 if (uri->scheme == NULL) {
7681 if ((ctxt->sax != NULL) &&
7682 (ctxt->sax->warning != NULL))
7683 ctxt->sax->warning(ctxt->userData,
7684 "xmlns: URI %s is not absolute\n", URL);
7685 }
7686 xmlFreeURI(uri);
7687 }
7688 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007690 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007691 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007692 for (j = 1;j <= nbNs;j++)
7693 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7694 break;
7695 if (j <= nbNs)
7696 xmlErrAttributeDup(ctxt, NULL, attname);
7697 else
7698 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007699 if (alloc != 0) xmlFree(attvalue);
7700 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 continue;
7702 }
7703 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007704 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7705 xmlURIPtr uri;
7706
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007707 if (attname == ctxt->str_xml) {
7708 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007709 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7710 "xml namespace prefix mapped to wrong URI\n",
7711 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007712 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007713 /*
7714 * Do not keep a namespace definition node
7715 */
7716 if (alloc != 0) xmlFree(attvalue);
7717 SKIP_BLANKS;
7718 continue;
7719 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007720 uri = xmlParseURI((const char *) URL);
7721 if (uri == NULL) {
7722 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7723 ctxt->sax->warning(ctxt->userData,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007724 "xmlns:%s: '%s' is not a valid URI\n",
7725 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007726 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007727 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007728 if ((ctxt->sax != NULL) &&
7729 (ctxt->sax->warning != NULL))
7730 ctxt->sax->warning(ctxt->userData,
7731 "xmlns:%s: URI %s is not absolute\n",
7732 attname, URL);
7733 }
7734 xmlFreeURI(uri);
7735 }
7736
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007738 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007740 for (j = 1;j <= nbNs;j++)
7741 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7742 break;
7743 if (j <= nbNs)
7744 xmlErrAttributeDup(ctxt, aprefix, attname);
7745 else
7746 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007747 if (alloc != 0) xmlFree(attvalue);
7748 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749 continue;
7750 }
7751
7752 /*
7753 * Add the pair to atts
7754 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007755 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7756 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757 if (attvalue[len] == 0)
7758 xmlFree(attvalue);
7759 goto failed;
7760 }
7761 maxatts = ctxt->maxatts;
7762 atts = ctxt->atts;
7763 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007764 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007765 atts[nbatts++] = attname;
7766 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007767 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007768 atts[nbatts++] = attvalue;
7769 attvalue += len;
7770 atts[nbatts++] = attvalue;
7771 /*
7772 * tag if some deallocation is needed
7773 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007774 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007775 } else {
7776 if ((attvalue != NULL) && (attvalue[len] == 0))
7777 xmlFree(attvalue);
7778 }
7779
7780failed:
7781
7782 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007783 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7785 break;
7786 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7788 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007789 }
7790 SKIP_BLANKS;
7791 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7792 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007793 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007794 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007795 break;
7796 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007797 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007798 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007799 }
7800
Daniel Veillard0fb18932003-09-07 09:14:37 +00007801 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007802 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007803 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007804 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007805 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7806 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007807 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007808 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007809 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007810 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007811 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812 /*
7813 * [ WFC: Unique Att Spec ]
7814 * No attribute name may appear more than once in the same
7815 * start-tag or empty-element tag.
7816 * As extended by the Namespace in XML REC.
7817 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007818 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007819 if (atts[i] == atts[j]) {
7820 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007821 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007822 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007823 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007824 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007825 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007826 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007827 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007828 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007829 }
7830 }
7831 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007832 }
7833
7834 /*
7835 * The attributes defaulting
7836 */
7837 if (ctxt->attsDefault != NULL) {
7838 xmlDefAttrsPtr defaults;
7839
7840 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7841 if (defaults != NULL) {
7842 for (i = 0;i < defaults->nbAttrs;i++) {
7843 attname = defaults->values[4 * i];
7844 aprefix = defaults->values[4 * i + 1];
7845
7846 /*
7847 * special work for namespaces defaulted defs
7848 */
7849 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7850 /*
7851 * check that it's not a defined namespace
7852 */
7853 for (j = 1;j <= nbNs;j++)
7854 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7855 break;
7856 if (j <= nbNs) continue;
7857
7858 nsname = xmlGetNamespace(ctxt, NULL);
7859 if (nsname != defaults->values[4 * i + 2]) {
7860 if (nsPush(ctxt, NULL,
7861 defaults->values[4 * i + 2]) > 0)
7862 nbNs++;
7863 }
7864 } else if (aprefix == ctxt->str_xmlns) {
7865 /*
7866 * check that it's not a defined namespace
7867 */
7868 for (j = 1;j <= nbNs;j++)
7869 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7870 break;
7871 if (j <= nbNs) continue;
7872
7873 nsname = xmlGetNamespace(ctxt, attname);
7874 if (nsname != defaults->values[2]) {
7875 if (nsPush(ctxt, attname,
7876 defaults->values[4 * i + 2]) > 0)
7877 nbNs++;
7878 }
7879 } else {
7880 /*
7881 * check that it's not a defined attribute
7882 */
7883 for (j = 0;j < nbatts;j+=5) {
7884 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7885 break;
7886 }
7887 if (j < nbatts) continue;
7888
7889 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7890 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007891 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007892 }
7893 maxatts = ctxt->maxatts;
7894 atts = ctxt->atts;
7895 }
7896 atts[nbatts++] = attname;
7897 atts[nbatts++] = aprefix;
7898 if (aprefix == NULL)
7899 atts[nbatts++] = NULL;
7900 else
7901 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7902 atts[nbatts++] = defaults->values[4 * i + 2];
7903 atts[nbatts++] = defaults->values[4 * i + 3];
7904 nbdef++;
7905 }
7906 }
7907 }
7908 }
7909
7910 nsname = xmlGetNamespace(ctxt, prefix);
7911 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007912 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7913 "Namespace prefix %s on %s is not defined\n",
7914 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007915 }
7916 *pref = prefix;
7917 *URI = nsname;
7918
7919 /*
7920 * SAX: Start of Element !
7921 */
7922 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7923 (!ctxt->disableSAX)) {
7924 if (nbNs > 0)
7925 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7926 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7927 nbatts / 5, nbdef, atts);
7928 else
7929 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7930 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7931 }
7932
7933 /*
7934 * Free up attribute allocated strings if needed
7935 */
7936 if (attval != 0) {
7937 for (i = 3,j = 0; j < nratts;i += 5,j++)
7938 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7939 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 }
7941
7942 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007943
7944base_changed:
7945 /*
7946 * the attribute strings are valid iif the base didn't changed
7947 */
7948 if (attval != 0) {
7949 for (i = 3,j = 0; j < nratts;i += 5,j++)
7950 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7951 xmlFree((xmlChar *) atts[i]);
7952 }
7953 ctxt->input->cur = ctxt->input->base + cur;
7954 if (ctxt->wellFormed == 1) {
7955 goto reparse;
7956 }
7957 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007958}
7959
7960/**
7961 * xmlParseEndTag2:
7962 * @ctxt: an XML parser context
7963 * @line: line of the start tag
7964 * @nsNr: number of namespaces on the start tag
7965 *
7966 * parse an end of tag
7967 *
7968 * [42] ETag ::= '</' Name S? '>'
7969 *
7970 * With namespace
7971 *
7972 * [NS 9] ETag ::= '</' QName S? '>'
7973 */
7974
7975static void
7976xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7977 const xmlChar *URI, int line, int nsNr) {
7978 const xmlChar *name;
7979
7980 GROW;
7981 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007982 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983 return;
7984 }
7985 SKIP(2);
7986
7987 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7988
7989 /*
7990 * We should definitely be at the ending "S? '>'" part
7991 */
7992 GROW;
7993 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007994 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007995 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007996 } else
7997 NEXT1;
7998
7999 /*
8000 * [ WFC: Element Type Match ]
8001 * The Name in an element's end-tag must match the element type in the
8002 * start-tag.
8003 *
8004 */
8005 if (name != (xmlChar*)1) {
8006 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
8007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
8008 if (name != NULL) {
8009 ctxt->sax->error(ctxt->userData,
8010 "Opening and ending tag mismatch: %s line %d and %s\n",
8011 ctxt->name, line, name);
8012 } else {
8013 ctxt->sax->error(ctxt->userData,
8014 "Ending tag error for: %s line %d\n", ctxt->name, line);
8015 }
8016
8017 }
8018 ctxt->wellFormed = 0;
8019 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
8020 }
8021
8022 /*
8023 * SAX: End of Tag
8024 */
8025 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8026 (!ctxt->disableSAX))
8027 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8028
Daniel Veillard0fb18932003-09-07 09:14:37 +00008029 spacePop(ctxt);
8030 if (nsNr != 0)
8031 nsPop(ctxt, nsNr);
8032 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008033}
8034
8035/**
Owen Taylor3473f882001-02-23 17:55:21 +00008036 * xmlParseCDSect:
8037 * @ctxt: an XML parser context
8038 *
8039 * Parse escaped pure raw content.
8040 *
8041 * [18] CDSect ::= CDStart CData CDEnd
8042 *
8043 * [19] CDStart ::= '<![CDATA['
8044 *
8045 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8046 *
8047 * [21] CDEnd ::= ']]>'
8048 */
8049void
8050xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8051 xmlChar *buf = NULL;
8052 int len = 0;
8053 int size = XML_PARSER_BUFFER_SIZE;
8054 int r, rl;
8055 int s, sl;
8056 int cur, l;
8057 int count = 0;
8058
8059 if ((NXT(0) == '<') && (NXT(1) == '!') &&
8060 (NXT(2) == '[') && (NXT(3) == 'C') &&
8061 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8062 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8063 (NXT(8) == '[')) {
8064 SKIP(9);
8065 } else
8066 return;
8067
8068 ctxt->instate = XML_PARSER_CDATA_SECTION;
8069 r = CUR_CHAR(rl);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008070 if (!xmlIsChar(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008071 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008072 ctxt->instate = XML_PARSER_CONTENT;
8073 return;
8074 }
8075 NEXTL(rl);
8076 s = CUR_CHAR(sl);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008077 if (!xmlIsChar(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008078 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008079 ctxt->instate = XML_PARSER_CONTENT;
8080 return;
8081 }
8082 NEXTL(sl);
8083 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008084 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008085 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008086 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008087 return;
8088 }
Daniel Veillard73b013f2003-09-30 12:36:01 +00008089 while (xmlIsChar(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008090 ((r != ']') || (s != ']') || (cur != '>'))) {
8091 if (len + 5 >= size) {
8092 size *= 2;
8093 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8094 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008095 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008096 return;
8097 }
8098 }
8099 COPY_BUF(rl,buf,len,r);
8100 r = s;
8101 rl = sl;
8102 s = cur;
8103 sl = l;
8104 count++;
8105 if (count > 50) {
8106 GROW;
8107 count = 0;
8108 }
8109 NEXTL(l);
8110 cur = CUR_CHAR(l);
8111 }
8112 buf[len] = 0;
8113 ctxt->instate = XML_PARSER_CONTENT;
8114 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008115 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008116 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008117 xmlFree(buf);
8118 return;
8119 }
8120 NEXTL(l);
8121
8122 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008123 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008124 */
8125 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8126 if (ctxt->sax->cdataBlock != NULL)
8127 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008128 else if (ctxt->sax->characters != NULL)
8129 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008130 }
8131 xmlFree(buf);
8132}
8133
8134/**
8135 * xmlParseContent:
8136 * @ctxt: an XML parser context
8137 *
8138 * Parse a content:
8139 *
8140 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8141 */
8142
8143void
8144xmlParseContent(xmlParserCtxtPtr ctxt) {
8145 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008146 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008147 ((RAW != '<') || (NXT(1) != '/'))) {
8148 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008149 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008150 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008151
8152 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008153 * First case : a Processing Instruction.
8154 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008155 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008156 xmlParsePI(ctxt);
8157 }
8158
8159 /*
8160 * Second case : a CDSection
8161 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008162 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008163 (NXT(2) == '[') && (NXT(3) == 'C') &&
8164 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8165 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8166 (NXT(8) == '[')) {
8167 xmlParseCDSect(ctxt);
8168 }
8169
8170 /*
8171 * Third case : a comment
8172 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008173 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008174 (NXT(2) == '-') && (NXT(3) == '-')) {
8175 xmlParseComment(ctxt);
8176 ctxt->instate = XML_PARSER_CONTENT;
8177 }
8178
8179 /*
8180 * Fourth case : a sub-element.
8181 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008182 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008183 xmlParseElement(ctxt);
8184 }
8185
8186 /*
8187 * Fifth case : a reference. If if has not been resolved,
8188 * parsing returns it's Name, create the node
8189 */
8190
Daniel Veillard21a0f912001-02-25 19:54:14 +00008191 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008192 xmlParseReference(ctxt);
8193 }
8194
8195 /*
8196 * Last case, text. Note that References are handled directly.
8197 */
8198 else {
8199 xmlParseCharData(ctxt, 0);
8200 }
8201
8202 GROW;
8203 /*
8204 * Pop-up of finished entities.
8205 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008206 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008207 xmlPopInput(ctxt);
8208 SHRINK;
8209
Daniel Veillardfdc91562002-07-01 21:52:03 +00008210 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008211 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8212 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008213 ctxt->instate = XML_PARSER_EOF;
8214 break;
8215 }
8216 }
8217}
8218
8219/**
8220 * xmlParseElement:
8221 * @ctxt: an XML parser context
8222 *
8223 * parse an XML element, this is highly recursive
8224 *
8225 * [39] element ::= EmptyElemTag | STag content ETag
8226 *
8227 * [ WFC: Element Type Match ]
8228 * The Name in an element's end-tag must match the element type in the
8229 * start-tag.
8230 *
Owen Taylor3473f882001-02-23 17:55:21 +00008231 */
8232
8233void
8234xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008235 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008236 const xmlChar *prefix;
8237 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008238 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008239 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008240 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008241 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008242
8243 /* Capture start position */
8244 if (ctxt->record_info) {
8245 node_info.begin_pos = ctxt->input->consumed +
8246 (CUR_PTR - ctxt->input->base);
8247 node_info.begin_line = ctxt->input->line;
8248 }
8249
8250 if (ctxt->spaceNr == 0)
8251 spacePush(ctxt, -1);
8252 else
8253 spacePush(ctxt, *ctxt->space);
8254
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008255 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008256#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008257 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008258#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008259 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008260#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008261 else
8262 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008263#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008264 if (name == NULL) {
8265 spacePop(ctxt);
8266 return;
8267 }
8268 namePush(ctxt, name);
8269 ret = ctxt->node;
8270
Daniel Veillard4432df22003-09-28 18:58:27 +00008271#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008272 /*
8273 * [ VC: Root Element Type ]
8274 * The Name in the document type declaration must match the element
8275 * type of the root element.
8276 */
8277 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8278 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8279 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008280#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008281
8282 /*
8283 * Check for an Empty Element.
8284 */
8285 if ((RAW == '/') && (NXT(1) == '>')) {
8286 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008287 if (ctxt->sax2) {
8288 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8289 (!ctxt->disableSAX))
8290 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008291#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008292 } else {
8293 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8294 (!ctxt->disableSAX))
8295 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008296#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008297 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008298 namePop(ctxt);
8299 spacePop(ctxt);
8300 if (nsNr != ctxt->nsNr)
8301 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008302 if ( ret != NULL && ctxt->record_info ) {
8303 node_info.end_pos = ctxt->input->consumed +
8304 (CUR_PTR - ctxt->input->base);
8305 node_info.end_line = ctxt->input->line;
8306 node_info.node = ret;
8307 xmlParserAddNodeInfo(ctxt, &node_info);
8308 }
8309 return;
8310 }
8311 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008312 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008313 } else {
8314 ctxt->errNo = XML_ERR_GT_REQUIRED;
8315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8316 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008317 "Couldn't find end of Start Tag %s line %d\n",
8318 name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008319 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008320 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008321
8322 /*
8323 * end of parsing of this node.
8324 */
8325 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008326 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008327 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008328 if (nsNr != ctxt->nsNr)
8329 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008330
8331 /*
8332 * Capture end position and add node
8333 */
8334 if ( ret != NULL && ctxt->record_info ) {
8335 node_info.end_pos = ctxt->input->consumed +
8336 (CUR_PTR - ctxt->input->base);
8337 node_info.end_line = ctxt->input->line;
8338 node_info.node = ret;
8339 xmlParserAddNodeInfo(ctxt, &node_info);
8340 }
8341 return;
8342 }
8343
8344 /*
8345 * Parse the content of the element:
8346 */
8347 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008348 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00008349 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00008350 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8351 ctxt->sax->error(ctxt->userData,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008352 "Premature end of data in tag %s line %d\n", name, line);
Owen Taylor3473f882001-02-23 17:55:21 +00008353 ctxt->wellFormed = 0;
Daniel Veillarddad3f682002-11-17 16:47:27 +00008354 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008355
8356 /*
8357 * end of parsing of this node.
8358 */
8359 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008360 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008361 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008362 if (nsNr != ctxt->nsNr)
8363 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008364 return;
8365 }
8366
8367 /*
8368 * parse the end of tag: '</' should be here.
8369 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008370 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008371 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008372 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008373 }
8374#ifdef LIBXML_SAX1_ENABLED
8375 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008376 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008377#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008378
8379 /*
8380 * Capture end position and add node
8381 */
8382 if ( ret != NULL && ctxt->record_info ) {
8383 node_info.end_pos = ctxt->input->consumed +
8384 (CUR_PTR - ctxt->input->base);
8385 node_info.end_line = ctxt->input->line;
8386 node_info.node = ret;
8387 xmlParserAddNodeInfo(ctxt, &node_info);
8388 }
8389}
8390
8391/**
8392 * xmlParseVersionNum:
8393 * @ctxt: an XML parser context
8394 *
8395 * parse the XML version value.
8396 *
8397 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8398 *
8399 * Returns the string giving the XML version number, or NULL
8400 */
8401xmlChar *
8402xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8403 xmlChar *buf = NULL;
8404 int len = 0;
8405 int size = 10;
8406 xmlChar cur;
8407
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008408 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008409 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008410 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008411 return(NULL);
8412 }
8413 cur = CUR;
8414 while (((cur >= 'a') && (cur <= 'z')) ||
8415 ((cur >= 'A') && (cur <= 'Z')) ||
8416 ((cur >= '0') && (cur <= '9')) ||
8417 (cur == '_') || (cur == '.') ||
8418 (cur == ':') || (cur == '-')) {
8419 if (len + 1 >= size) {
8420 size *= 2;
8421 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8422 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008424 return(NULL);
8425 }
8426 }
8427 buf[len++] = cur;
8428 NEXT;
8429 cur=CUR;
8430 }
8431 buf[len] = 0;
8432 return(buf);
8433}
8434
8435/**
8436 * xmlParseVersionInfo:
8437 * @ctxt: an XML parser context
8438 *
8439 * parse the XML version.
8440 *
8441 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8442 *
8443 * [25] Eq ::= S? '=' S?
8444 *
8445 * Returns the version string, e.g. "1.0"
8446 */
8447
8448xmlChar *
8449xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8450 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008451
8452 if ((RAW == 'v') && (NXT(1) == 'e') &&
8453 (NXT(2) == 'r') && (NXT(3) == 's') &&
8454 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8455 (NXT(6) == 'n')) {
8456 SKIP(7);
8457 SKIP_BLANKS;
8458 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008459 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008460 return(NULL);
8461 }
8462 NEXT;
8463 SKIP_BLANKS;
8464 if (RAW == '"') {
8465 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008466 version = xmlParseVersionNum(ctxt);
8467 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008468 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008469 } else
8470 NEXT;
8471 } else if (RAW == '\''){
8472 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008473 version = xmlParseVersionNum(ctxt);
8474 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008475 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008476 } else
8477 NEXT;
8478 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008479 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 }
8481 }
8482 return(version);
8483}
8484
8485/**
8486 * xmlParseEncName:
8487 * @ctxt: an XML parser context
8488 *
8489 * parse the XML encoding name
8490 *
8491 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8492 *
8493 * Returns the encoding name value or NULL
8494 */
8495xmlChar *
8496xmlParseEncName(xmlParserCtxtPtr ctxt) {
8497 xmlChar *buf = NULL;
8498 int len = 0;
8499 int size = 10;
8500 xmlChar cur;
8501
8502 cur = CUR;
8503 if (((cur >= 'a') && (cur <= 'z')) ||
8504 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008505 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008506 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008507 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008508 return(NULL);
8509 }
8510
8511 buf[len++] = cur;
8512 NEXT;
8513 cur = CUR;
8514 while (((cur >= 'a') && (cur <= 'z')) ||
8515 ((cur >= 'A') && (cur <= 'Z')) ||
8516 ((cur >= '0') && (cur <= '9')) ||
8517 (cur == '.') || (cur == '_') ||
8518 (cur == '-')) {
8519 if (len + 1 >= size) {
8520 size *= 2;
8521 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8522 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008523 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008524 return(NULL);
8525 }
8526 }
8527 buf[len++] = cur;
8528 NEXT;
8529 cur = CUR;
8530 if (cur == 0) {
8531 SHRINK;
8532 GROW;
8533 cur = CUR;
8534 }
8535 }
8536 buf[len] = 0;
8537 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008538 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008539 }
8540 return(buf);
8541}
8542
8543/**
8544 * xmlParseEncodingDecl:
8545 * @ctxt: an XML parser context
8546 *
8547 * parse the XML encoding declaration
8548 *
8549 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8550 *
8551 * this setups the conversion filters.
8552 *
8553 * Returns the encoding value or NULL
8554 */
8555
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008556const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008557xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8558 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008559
8560 SKIP_BLANKS;
8561 if ((RAW == 'e') && (NXT(1) == 'n') &&
8562 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8563 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8564 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8565 SKIP(8);
8566 SKIP_BLANKS;
8567 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008568 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008569 return(NULL);
8570 }
8571 NEXT;
8572 SKIP_BLANKS;
8573 if (RAW == '"') {
8574 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008575 encoding = xmlParseEncName(ctxt);
8576 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008577 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008578 } else
8579 NEXT;
8580 } else if (RAW == '\''){
8581 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008582 encoding = xmlParseEncName(ctxt);
8583 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008584 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008585 } else
8586 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008587 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008588 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008589 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008590 /*
8591 * UTF-16 encoding stwich has already taken place at this stage,
8592 * more over the little-endian/big-endian selection is already done
8593 */
8594 if ((encoding != NULL) &&
8595 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8596 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008597 if (ctxt->encoding != NULL)
8598 xmlFree((xmlChar *) ctxt->encoding);
8599 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008600 }
8601 /*
8602 * UTF-8 encoding is handled natively
8603 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008604 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008605 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8606 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008607 if (ctxt->encoding != NULL)
8608 xmlFree((xmlChar *) ctxt->encoding);
8609 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008610 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008611 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008612 xmlCharEncodingHandlerPtr handler;
8613
8614 if (ctxt->input->encoding != NULL)
8615 xmlFree((xmlChar *) ctxt->input->encoding);
8616 ctxt->input->encoding = encoding;
8617
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008618 handler = xmlFindCharEncodingHandler((const char *) encoding);
8619 if (handler != NULL) {
8620 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008621 } else {
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008622 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8624 ctxt->sax->error(ctxt->userData,
8625 "Unsupported encoding %s\n", encoding);
8626 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 }
8628 }
8629 }
8630 return(encoding);
8631}
8632
8633/**
8634 * xmlParseSDDecl:
8635 * @ctxt: an XML parser context
8636 *
8637 * parse the XML standalone declaration
8638 *
8639 * [32] SDDecl ::= S 'standalone' Eq
8640 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8641 *
8642 * [ VC: Standalone Document Declaration ]
8643 * TODO The standalone document declaration must have the value "no"
8644 * if any external markup declarations contain declarations of:
8645 * - attributes with default values, if elements to which these
8646 * attributes apply appear in the document without specifications
8647 * of values for these attributes, or
8648 * - entities (other than amp, lt, gt, apos, quot), if references
8649 * to those entities appear in the document, or
8650 * - attributes with values subject to normalization, where the
8651 * attribute appears in the document with a value which will change
8652 * as a result of normalization, or
8653 * - element types with element content, if white space occurs directly
8654 * within any instance of those types.
8655 *
8656 * Returns 1 if standalone, 0 otherwise
8657 */
8658
8659int
8660xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8661 int standalone = -1;
8662
8663 SKIP_BLANKS;
8664 if ((RAW == 's') && (NXT(1) == 't') &&
8665 (NXT(2) == 'a') && (NXT(3) == 'n') &&
8666 (NXT(4) == 'd') && (NXT(5) == 'a') &&
8667 (NXT(6) == 'l') && (NXT(7) == 'o') &&
8668 (NXT(8) == 'n') && (NXT(9) == 'e')) {
8669 SKIP(10);
8670 SKIP_BLANKS;
8671 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008672 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008673 return(standalone);
8674 }
8675 NEXT;
8676 SKIP_BLANKS;
8677 if (RAW == '\''){
8678 NEXT;
8679 if ((RAW == 'n') && (NXT(1) == 'o')) {
8680 standalone = 0;
8681 SKIP(2);
8682 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8683 (NXT(2) == 's')) {
8684 standalone = 1;
8685 SKIP(3);
8686 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008687 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008688 }
8689 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008690 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008691 } else
8692 NEXT;
8693 } else if (RAW == '"'){
8694 NEXT;
8695 if ((RAW == 'n') && (NXT(1) == 'o')) {
8696 standalone = 0;
8697 SKIP(2);
8698 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8699 (NXT(2) == 's')) {
8700 standalone = 1;
8701 SKIP(3);
8702 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008703 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008704 }
8705 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008706 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008707 } else
8708 NEXT;
8709 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008710 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008711 }
8712 }
8713 return(standalone);
8714}
8715
8716/**
8717 * xmlParseXMLDecl:
8718 * @ctxt: an XML parser context
8719 *
8720 * parse an XML declaration header
8721 *
8722 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8723 */
8724
8725void
8726xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8727 xmlChar *version;
8728
8729 /*
8730 * We know that '<?xml' is here.
8731 */
8732 SKIP(5);
8733
8734 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8736 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008737 }
8738 SKIP_BLANKS;
8739
8740 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008741 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008742 */
8743 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008744 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008745 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008746 } else {
8747 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8748 /*
8749 * TODO: Blueberry should be detected here
8750 */
8751 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
8752 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
8753 version);
8754 }
8755 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008756 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008757 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008758 }
Owen Taylor3473f882001-02-23 17:55:21 +00008759
8760 /*
8761 * We may have the encoding declaration
8762 */
8763 if (!IS_BLANK(RAW)) {
8764 if ((RAW == '?') && (NXT(1) == '>')) {
8765 SKIP(2);
8766 return;
8767 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008768 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008769 }
8770 xmlParseEncodingDecl(ctxt);
8771 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8772 /*
8773 * The XML REC instructs us to stop parsing right here
8774 */
8775 return;
8776 }
8777
8778 /*
8779 * We may have the standalone status.
8780 */
8781 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8782 if ((RAW == '?') && (NXT(1) == '>')) {
8783 SKIP(2);
8784 return;
8785 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008787 }
8788 SKIP_BLANKS;
8789 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8790
8791 SKIP_BLANKS;
8792 if ((RAW == '?') && (NXT(1) == '>')) {
8793 SKIP(2);
8794 } else if (RAW == '>') {
8795 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008796 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008797 NEXT;
8798 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008799 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008800 MOVETO_ENDTAG(CUR_PTR);
8801 NEXT;
8802 }
8803}
8804
8805/**
8806 * xmlParseMisc:
8807 * @ctxt: an XML parser context
8808 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008809 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008810 *
8811 * [27] Misc ::= Comment | PI | S
8812 */
8813
8814void
8815xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008816 while (((RAW == '<') && (NXT(1) == '?')) ||
8817 ((RAW == '<') && (NXT(1) == '!') &&
8818 (NXT(2) == '-') && (NXT(3) == '-')) ||
8819 IS_BLANK(CUR)) {
8820 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008821 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008822 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008823 NEXT;
8824 } else
8825 xmlParseComment(ctxt);
8826 }
8827}
8828
8829/**
8830 * xmlParseDocument:
8831 * @ctxt: an XML parser context
8832 *
8833 * parse an XML document (and build a tree if using the standard SAX
8834 * interface).
8835 *
8836 * [1] document ::= prolog element Misc*
8837 *
8838 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8839 *
8840 * Returns 0, -1 in case of error. the parser context is augmented
8841 * as a result of the parsing.
8842 */
8843
8844int
8845xmlParseDocument(xmlParserCtxtPtr ctxt) {
8846 xmlChar start[4];
8847 xmlCharEncoding enc;
8848
8849 xmlInitParser();
8850
8851 GROW;
8852
8853 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854 * SAX: detecting the level.
8855 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008856 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008857
8858 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008859 * SAX: beginning of the document processing.
8860 */
8861 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8862 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8863
Daniel Veillard50f34372001-08-03 12:06:36 +00008864 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008865 /*
8866 * Get the 4 first bytes and decode the charset
8867 * if enc != XML_CHAR_ENCODING_NONE
8868 * plug some encoding conversion routines.
8869 */
8870 start[0] = RAW;
8871 start[1] = NXT(1);
8872 start[2] = NXT(2);
8873 start[3] = NXT(3);
8874 enc = xmlDetectCharEncoding(start, 4);
8875 if (enc != XML_CHAR_ENCODING_NONE) {
8876 xmlSwitchEncoding(ctxt, enc);
8877 }
Owen Taylor3473f882001-02-23 17:55:21 +00008878 }
8879
8880
8881 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008882 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008883 }
8884
8885 /*
8886 * Check for the XMLDecl in the Prolog.
8887 */
8888 GROW;
8889 if ((RAW == '<') && (NXT(1) == '?') &&
8890 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8891 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8892
8893 /*
8894 * Note that we will switch encoding on the fly.
8895 */
8896 xmlParseXMLDecl(ctxt);
8897 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8898 /*
8899 * The XML REC instructs us to stop parsing right here
8900 */
8901 return(-1);
8902 }
8903 ctxt->standalone = ctxt->input->standalone;
8904 SKIP_BLANKS;
8905 } else {
8906 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8907 }
8908 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8909 ctxt->sax->startDocument(ctxt->userData);
8910
8911 /*
8912 * The Misc part of the Prolog
8913 */
8914 GROW;
8915 xmlParseMisc(ctxt);
8916
8917 /*
8918 * Then possibly doc type declaration(s) and more Misc
8919 * (doctypedecl Misc*)?
8920 */
8921 GROW;
8922 if ((RAW == '<') && (NXT(1) == '!') &&
8923 (NXT(2) == 'D') && (NXT(3) == 'O') &&
8924 (NXT(4) == 'C') && (NXT(5) == 'T') &&
8925 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8926 (NXT(8) == 'E')) {
8927
8928 ctxt->inSubset = 1;
8929 xmlParseDocTypeDecl(ctxt);
8930 if (RAW == '[') {
8931 ctxt->instate = XML_PARSER_DTD;
8932 xmlParseInternalSubset(ctxt);
8933 }
8934
8935 /*
8936 * Create and update the external subset.
8937 */
8938 ctxt->inSubset = 2;
8939 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8940 (!ctxt->disableSAX))
8941 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8942 ctxt->extSubSystem, ctxt->extSubURI);
8943 ctxt->inSubset = 0;
8944
8945
8946 ctxt->instate = XML_PARSER_PROLOG;
8947 xmlParseMisc(ctxt);
8948 }
8949
8950 /*
8951 * Time to start parsing the tree itself
8952 */
8953 GROW;
8954 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008955 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8956 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008957 } else {
8958 ctxt->instate = XML_PARSER_CONTENT;
8959 xmlParseElement(ctxt);
8960 ctxt->instate = XML_PARSER_EPILOG;
8961
8962
8963 /*
8964 * The Misc part at the end
8965 */
8966 xmlParseMisc(ctxt);
8967
Daniel Veillard561b7f82002-03-20 21:55:57 +00008968 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008969 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008970 }
8971 ctxt->instate = XML_PARSER_EOF;
8972 }
8973
8974 /*
8975 * SAX: end of the document processing.
8976 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008977 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008978 ctxt->sax->endDocument(ctxt->userData);
8979
Daniel Veillard5997aca2002-03-18 18:36:20 +00008980 /*
8981 * Remove locally kept entity definitions if the tree was not built
8982 */
8983 if ((ctxt->myDoc != NULL) &&
8984 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8985 xmlFreeDoc(ctxt->myDoc);
8986 ctxt->myDoc = NULL;
8987 }
8988
Daniel Veillardc7612992002-02-17 22:47:37 +00008989 if (! ctxt->wellFormed) {
8990 ctxt->valid = 0;
8991 return(-1);
8992 }
Owen Taylor3473f882001-02-23 17:55:21 +00008993 return(0);
8994}
8995
8996/**
8997 * xmlParseExtParsedEnt:
8998 * @ctxt: an XML parser context
8999 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009000 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009001 * An external general parsed entity is well-formed if it matches the
9002 * production labeled extParsedEnt.
9003 *
9004 * [78] extParsedEnt ::= TextDecl? content
9005 *
9006 * Returns 0, -1 in case of error. the parser context is augmented
9007 * as a result of the parsing.
9008 */
9009
9010int
9011xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9012 xmlChar start[4];
9013 xmlCharEncoding enc;
9014
9015 xmlDefaultSAXHandlerInit();
9016
Daniel Veillard309f81d2003-09-23 09:02:53 +00009017 xmlDetectSAX2(ctxt);
9018
Owen Taylor3473f882001-02-23 17:55:21 +00009019 GROW;
9020
9021 /*
9022 * SAX: beginning of the document processing.
9023 */
9024 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9025 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9026
9027 /*
9028 * Get the 4 first bytes and decode the charset
9029 * if enc != XML_CHAR_ENCODING_NONE
9030 * plug some encoding conversion routines.
9031 */
9032 start[0] = RAW;
9033 start[1] = NXT(1);
9034 start[2] = NXT(2);
9035 start[3] = NXT(3);
9036 enc = xmlDetectCharEncoding(start, 4);
9037 if (enc != XML_CHAR_ENCODING_NONE) {
9038 xmlSwitchEncoding(ctxt, enc);
9039 }
9040
9041
9042 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009043 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009044 }
9045
9046 /*
9047 * Check for the XMLDecl in the Prolog.
9048 */
9049 GROW;
9050 if ((RAW == '<') && (NXT(1) == '?') &&
9051 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9052 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9053
9054 /*
9055 * Note that we will switch encoding on the fly.
9056 */
9057 xmlParseXMLDecl(ctxt);
9058 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9059 /*
9060 * The XML REC instructs us to stop parsing right here
9061 */
9062 return(-1);
9063 }
9064 SKIP_BLANKS;
9065 } else {
9066 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9067 }
9068 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9069 ctxt->sax->startDocument(ctxt->userData);
9070
9071 /*
9072 * Doing validity checking on chunk doesn't make sense
9073 */
9074 ctxt->instate = XML_PARSER_CONTENT;
9075 ctxt->validate = 0;
9076 ctxt->loadsubset = 0;
9077 ctxt->depth = 0;
9078
9079 xmlParseContent(ctxt);
9080
9081 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009082 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009083 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009084 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009085 }
9086
9087 /*
9088 * SAX: end of the document processing.
9089 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009090 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009091 ctxt->sax->endDocument(ctxt->userData);
9092
9093 if (! ctxt->wellFormed) return(-1);
9094 return(0);
9095}
9096
Daniel Veillard73b013f2003-09-30 12:36:01 +00009097#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009098/************************************************************************
9099 * *
9100 * Progressive parsing interfaces *
9101 * *
9102 ************************************************************************/
9103
9104/**
9105 * xmlParseLookupSequence:
9106 * @ctxt: an XML parser context
9107 * @first: the first char to lookup
9108 * @next: the next char to lookup or zero
9109 * @third: the next char to lookup or zero
9110 *
9111 * Try to find if a sequence (first, next, third) or just (first next) or
9112 * (first) is available in the input stream.
9113 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9114 * to avoid rescanning sequences of bytes, it DOES change the state of the
9115 * parser, do not use liberally.
9116 *
9117 * Returns the index to the current parsing point if the full sequence
9118 * is available, -1 otherwise.
9119 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009120static int
Owen Taylor3473f882001-02-23 17:55:21 +00009121xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9122 xmlChar next, xmlChar third) {
9123 int base, len;
9124 xmlParserInputPtr in;
9125 const xmlChar *buf;
9126
9127 in = ctxt->input;
9128 if (in == NULL) return(-1);
9129 base = in->cur - in->base;
9130 if (base < 0) return(-1);
9131 if (ctxt->checkIndex > base)
9132 base = ctxt->checkIndex;
9133 if (in->buf == NULL) {
9134 buf = in->base;
9135 len = in->length;
9136 } else {
9137 buf = in->buf->buffer->content;
9138 len = in->buf->buffer->use;
9139 }
9140 /* take into account the sequence length */
9141 if (third) len -= 2;
9142 else if (next) len --;
9143 for (;base < len;base++) {
9144 if (buf[base] == first) {
9145 if (third != 0) {
9146 if ((buf[base + 1] != next) ||
9147 (buf[base + 2] != third)) continue;
9148 } else if (next != 0) {
9149 if (buf[base + 1] != next) continue;
9150 }
9151 ctxt->checkIndex = 0;
9152#ifdef DEBUG_PUSH
9153 if (next == 0)
9154 xmlGenericError(xmlGenericErrorContext,
9155 "PP: lookup '%c' found at %d\n",
9156 first, base);
9157 else if (third == 0)
9158 xmlGenericError(xmlGenericErrorContext,
9159 "PP: lookup '%c%c' found at %d\n",
9160 first, next, base);
9161 else
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: lookup '%c%c%c' found at %d\n",
9164 first, next, third, base);
9165#endif
9166 return(base - (in->cur - in->base));
9167 }
9168 }
9169 ctxt->checkIndex = base;
9170#ifdef DEBUG_PUSH
9171 if (next == 0)
9172 xmlGenericError(xmlGenericErrorContext,
9173 "PP: lookup '%c' failed\n", first);
9174 else if (third == 0)
9175 xmlGenericError(xmlGenericErrorContext,
9176 "PP: lookup '%c%c' failed\n", first, next);
9177 else
9178 xmlGenericError(xmlGenericErrorContext,
9179 "PP: lookup '%c%c%c' failed\n", first, next, third);
9180#endif
9181 return(-1);
9182}
9183
9184/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009185 * xmlParseGetLasts:
9186 * @ctxt: an XML parser context
9187 * @lastlt: pointer to store the last '<' from the input
9188 * @lastgt: pointer to store the last '>' from the input
9189 *
9190 * Lookup the last < and > in the current chunk
9191 */
9192static void
9193xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9194 const xmlChar **lastgt) {
9195 const xmlChar *tmp;
9196
9197 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9198 xmlGenericError(xmlGenericErrorContext,
9199 "Internal error: xmlParseGetLasts\n");
9200 return;
9201 }
9202 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9203 tmp = ctxt->input->end;
9204 tmp--;
9205 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9206 (*tmp != '>')) tmp--;
9207 if (tmp < ctxt->input->base) {
9208 *lastlt = NULL;
9209 *lastgt = NULL;
9210 } else if (*tmp == '<') {
9211 *lastlt = tmp;
9212 tmp--;
9213 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9214 if (tmp < ctxt->input->base)
9215 *lastgt = NULL;
9216 else
9217 *lastgt = tmp;
9218 } else {
9219 *lastgt = tmp;
9220 tmp--;
9221 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9222 if (tmp < ctxt->input->base)
9223 *lastlt = NULL;
9224 else
9225 *lastlt = tmp;
9226 }
9227
9228 } else {
9229 *lastlt = NULL;
9230 *lastgt = NULL;
9231 }
9232}
9233/**
Owen Taylor3473f882001-02-23 17:55:21 +00009234 * xmlParseTryOrFinish:
9235 * @ctxt: an XML parser context
9236 * @terminate: last chunk indicator
9237 *
9238 * Try to progress on parsing
9239 *
9240 * Returns zero if no parsing was possible
9241 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009242static int
Owen Taylor3473f882001-02-23 17:55:21 +00009243xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9244 int ret = 0;
9245 int avail;
9246 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009247 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009248
9249#ifdef DEBUG_PUSH
9250 switch (ctxt->instate) {
9251 case XML_PARSER_EOF:
9252 xmlGenericError(xmlGenericErrorContext,
9253 "PP: try EOF\n"); break;
9254 case XML_PARSER_START:
9255 xmlGenericError(xmlGenericErrorContext,
9256 "PP: try START\n"); break;
9257 case XML_PARSER_MISC:
9258 xmlGenericError(xmlGenericErrorContext,
9259 "PP: try MISC\n");break;
9260 case XML_PARSER_COMMENT:
9261 xmlGenericError(xmlGenericErrorContext,
9262 "PP: try COMMENT\n");break;
9263 case XML_PARSER_PROLOG:
9264 xmlGenericError(xmlGenericErrorContext,
9265 "PP: try PROLOG\n");break;
9266 case XML_PARSER_START_TAG:
9267 xmlGenericError(xmlGenericErrorContext,
9268 "PP: try START_TAG\n");break;
9269 case XML_PARSER_CONTENT:
9270 xmlGenericError(xmlGenericErrorContext,
9271 "PP: try CONTENT\n");break;
9272 case XML_PARSER_CDATA_SECTION:
9273 xmlGenericError(xmlGenericErrorContext,
9274 "PP: try CDATA_SECTION\n");break;
9275 case XML_PARSER_END_TAG:
9276 xmlGenericError(xmlGenericErrorContext,
9277 "PP: try END_TAG\n");break;
9278 case XML_PARSER_ENTITY_DECL:
9279 xmlGenericError(xmlGenericErrorContext,
9280 "PP: try ENTITY_DECL\n");break;
9281 case XML_PARSER_ENTITY_VALUE:
9282 xmlGenericError(xmlGenericErrorContext,
9283 "PP: try ENTITY_VALUE\n");break;
9284 case XML_PARSER_ATTRIBUTE_VALUE:
9285 xmlGenericError(xmlGenericErrorContext,
9286 "PP: try ATTRIBUTE_VALUE\n");break;
9287 case XML_PARSER_DTD:
9288 xmlGenericError(xmlGenericErrorContext,
9289 "PP: try DTD\n");break;
9290 case XML_PARSER_EPILOG:
9291 xmlGenericError(xmlGenericErrorContext,
9292 "PP: try EPILOG\n");break;
9293 case XML_PARSER_PI:
9294 xmlGenericError(xmlGenericErrorContext,
9295 "PP: try PI\n");break;
9296 case XML_PARSER_IGNORE:
9297 xmlGenericError(xmlGenericErrorContext,
9298 "PP: try IGNORE\n");break;
9299 }
9300#endif
9301
Daniel Veillarda880b122003-04-21 21:36:41 +00009302 if (ctxt->input->cur - ctxt->input->base > 4096) {
9303 xmlSHRINK(ctxt);
9304 ctxt->checkIndex = 0;
9305 }
9306 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009307
Daniel Veillarda880b122003-04-21 21:36:41 +00009308 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009309 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9310 return(0);
9311
9312
Owen Taylor3473f882001-02-23 17:55:21 +00009313 /*
9314 * Pop-up of finished entities.
9315 */
9316 while ((RAW == 0) && (ctxt->inputNr > 1))
9317 xmlPopInput(ctxt);
9318
9319 if (ctxt->input ==NULL) break;
9320 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009321 avail = ctxt->input->length -
9322 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009323 else {
9324 /*
9325 * If we are operating on converted input, try to flush
9326 * remainng chars to avoid them stalling in the non-converted
9327 * buffer.
9328 */
9329 if ((ctxt->input->buf->raw != NULL) &&
9330 (ctxt->input->buf->raw->use > 0)) {
9331 int base = ctxt->input->base -
9332 ctxt->input->buf->buffer->content;
9333 int current = ctxt->input->cur - ctxt->input->base;
9334
9335 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9336 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9337 ctxt->input->cur = ctxt->input->base + current;
9338 ctxt->input->end =
9339 &ctxt->input->buf->buffer->content[
9340 ctxt->input->buf->buffer->use];
9341 }
9342 avail = ctxt->input->buf->buffer->use -
9343 (ctxt->input->cur - ctxt->input->base);
9344 }
Owen Taylor3473f882001-02-23 17:55:21 +00009345 if (avail < 1)
9346 goto done;
9347 switch (ctxt->instate) {
9348 case XML_PARSER_EOF:
9349 /*
9350 * Document parsing is done !
9351 */
9352 goto done;
9353 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009354 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9355 xmlChar start[4];
9356 xmlCharEncoding enc;
9357
9358 /*
9359 * Very first chars read from the document flow.
9360 */
9361 if (avail < 4)
9362 goto done;
9363
9364 /*
9365 * Get the 4 first bytes and decode the charset
9366 * if enc != XML_CHAR_ENCODING_NONE
9367 * plug some encoding conversion routines.
9368 */
9369 start[0] = RAW;
9370 start[1] = NXT(1);
9371 start[2] = NXT(2);
9372 start[3] = NXT(3);
9373 enc = xmlDetectCharEncoding(start, 4);
9374 if (enc != XML_CHAR_ENCODING_NONE) {
9375 xmlSwitchEncoding(ctxt, enc);
9376 }
9377 break;
9378 }
Owen Taylor3473f882001-02-23 17:55:21 +00009379
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009380 if (avail < 2)
9381 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009382 cur = ctxt->input->cur[0];
9383 next = ctxt->input->cur[1];
9384 if (cur == 0) {
9385 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9386 ctxt->sax->setDocumentLocator(ctxt->userData,
9387 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009388 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009389 ctxt->instate = XML_PARSER_EOF;
9390#ifdef DEBUG_PUSH
9391 xmlGenericError(xmlGenericErrorContext,
9392 "PP: entering EOF\n");
9393#endif
9394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9395 ctxt->sax->endDocument(ctxt->userData);
9396 goto done;
9397 }
9398 if ((cur == '<') && (next == '?')) {
9399 /* PI or XML decl */
9400 if (avail < 5) return(ret);
9401 if ((!terminate) &&
9402 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9403 return(ret);
9404 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9405 ctxt->sax->setDocumentLocator(ctxt->userData,
9406 &xmlDefaultSAXLocator);
9407 if ((ctxt->input->cur[2] == 'x') &&
9408 (ctxt->input->cur[3] == 'm') &&
9409 (ctxt->input->cur[4] == 'l') &&
9410 (IS_BLANK(ctxt->input->cur[5]))) {
9411 ret += 5;
9412#ifdef DEBUG_PUSH
9413 xmlGenericError(xmlGenericErrorContext,
9414 "PP: Parsing XML Decl\n");
9415#endif
9416 xmlParseXMLDecl(ctxt);
9417 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9418 /*
9419 * The XML REC instructs us to stop parsing right
9420 * here
9421 */
9422 ctxt->instate = XML_PARSER_EOF;
9423 return(0);
9424 }
9425 ctxt->standalone = ctxt->input->standalone;
9426 if ((ctxt->encoding == NULL) &&
9427 (ctxt->input->encoding != NULL))
9428 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9429 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9430 (!ctxt->disableSAX))
9431 ctxt->sax->startDocument(ctxt->userData);
9432 ctxt->instate = XML_PARSER_MISC;
9433#ifdef DEBUG_PUSH
9434 xmlGenericError(xmlGenericErrorContext,
9435 "PP: entering MISC\n");
9436#endif
9437 } else {
9438 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9439 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9440 (!ctxt->disableSAX))
9441 ctxt->sax->startDocument(ctxt->userData);
9442 ctxt->instate = XML_PARSER_MISC;
9443#ifdef DEBUG_PUSH
9444 xmlGenericError(xmlGenericErrorContext,
9445 "PP: entering MISC\n");
9446#endif
9447 }
9448 } else {
9449 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9450 ctxt->sax->setDocumentLocator(ctxt->userData,
9451 &xmlDefaultSAXLocator);
9452 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9453 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9454 (!ctxt->disableSAX))
9455 ctxt->sax->startDocument(ctxt->userData);
9456 ctxt->instate = XML_PARSER_MISC;
9457#ifdef DEBUG_PUSH
9458 xmlGenericError(xmlGenericErrorContext,
9459 "PP: entering MISC\n");
9460#endif
9461 }
9462 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009463 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009464 const xmlChar *name;
9465 const xmlChar *prefix;
9466 const xmlChar *URI;
9467 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009468
9469 if ((avail < 2) && (ctxt->inputNr == 1))
9470 goto done;
9471 cur = ctxt->input->cur[0];
9472 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009473 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009474 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9476 ctxt->sax->endDocument(ctxt->userData);
9477 goto done;
9478 }
9479 if (!terminate) {
9480 if (ctxt->progressive) {
9481 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9482 goto done;
9483 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9484 goto done;
9485 }
9486 }
9487 if (ctxt->spaceNr == 0)
9488 spacePush(ctxt, -1);
9489 else
9490 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009491#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009492 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009493#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009494 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009495#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009496 else
9497 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009498#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 if (name == NULL) {
9500 spacePop(ctxt);
9501 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009502 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9503 ctxt->sax->endDocument(ctxt->userData);
9504 goto done;
9505 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009506#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009507 /*
9508 * [ VC: Root Element Type ]
9509 * The Name in the document type declaration must match
9510 * the element type of the root element.
9511 */
9512 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9513 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9514 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009515#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009516
9517 /*
9518 * Check for an Empty Element.
9519 */
9520 if ((RAW == '/') && (NXT(1) == '>')) {
9521 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009522
9523 if (ctxt->sax2) {
9524 if ((ctxt->sax != NULL) &&
9525 (ctxt->sax->endElementNs != NULL) &&
9526 (!ctxt->disableSAX))
9527 ctxt->sax->endElementNs(ctxt->userData, name,
9528 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009529#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009530 } else {
9531 if ((ctxt->sax != NULL) &&
9532 (ctxt->sax->endElement != NULL) &&
9533 (!ctxt->disableSAX))
9534 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009535#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009536 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009537 spacePop(ctxt);
9538 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009539 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009540 } else {
9541 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 }
9543 break;
9544 }
9545 if (RAW == '>') {
9546 NEXT;
9547 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009548 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009549 "Couldn't find end of Start Tag %s\n",
9550 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009551 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009552 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009553 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009554 if (ctxt->sax2)
9555 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009556#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 else
9558 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009559#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009560
Daniel Veillarda880b122003-04-21 21:36:41 +00009561 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009562 break;
9563 }
9564 case XML_PARSER_CONTENT: {
9565 const xmlChar *test;
9566 unsigned int cons;
9567 if ((avail < 2) && (ctxt->inputNr == 1))
9568 goto done;
9569 cur = ctxt->input->cur[0];
9570 next = ctxt->input->cur[1];
9571
9572 test = CUR_PTR;
9573 cons = ctxt->input->consumed;
9574 if ((cur == '<') && (next == '/')) {
9575 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009576 break;
9577 } else if ((cur == '<') && (next == '?')) {
9578 if ((!terminate) &&
9579 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9580 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009581 xmlParsePI(ctxt);
9582 } else if ((cur == '<') && (next != '!')) {
9583 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 break;
9585 } else if ((cur == '<') && (next == '!') &&
9586 (ctxt->input->cur[2] == '-') &&
9587 (ctxt->input->cur[3] == '-')) {
9588 if ((!terminate) &&
9589 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9590 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009591 xmlParseComment(ctxt);
9592 ctxt->instate = XML_PARSER_CONTENT;
9593 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9594 (ctxt->input->cur[2] == '[') &&
9595 (ctxt->input->cur[3] == 'C') &&
9596 (ctxt->input->cur[4] == 'D') &&
9597 (ctxt->input->cur[5] == 'A') &&
9598 (ctxt->input->cur[6] == 'T') &&
9599 (ctxt->input->cur[7] == 'A') &&
9600 (ctxt->input->cur[8] == '[')) {
9601 SKIP(9);
9602 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009603 break;
9604 } else if ((cur == '<') && (next == '!') &&
9605 (avail < 9)) {
9606 goto done;
9607 } else if (cur == '&') {
9608 if ((!terminate) &&
9609 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9610 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009611 xmlParseReference(ctxt);
9612 } else {
9613 /* TODO Avoid the extra copy, handle directly !!! */
9614 /*
9615 * Goal of the following test is:
9616 * - minimize calls to the SAX 'character' callback
9617 * when they are mergeable
9618 * - handle an problem for isBlank when we only parse
9619 * a sequence of blank chars and the next one is
9620 * not available to check against '<' presence.
9621 * - tries to homogenize the differences in SAX
9622 * callbacks between the push and pull versions
9623 * of the parser.
9624 */
9625 if ((ctxt->inputNr == 1) &&
9626 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9627 if (!terminate) {
9628 if (ctxt->progressive) {
9629 if ((lastlt == NULL) ||
9630 (ctxt->input->cur > lastlt))
9631 goto done;
9632 } else if (xmlParseLookupSequence(ctxt,
9633 '<', 0, 0) < 0) {
9634 goto done;
9635 }
9636 }
9637 }
9638 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009639 xmlParseCharData(ctxt, 0);
9640 }
9641 /*
9642 * Pop-up of finished entities.
9643 */
9644 while ((RAW == 0) && (ctxt->inputNr > 1))
9645 xmlPopInput(ctxt);
9646 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009647 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9648 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009649 ctxt->instate = XML_PARSER_EOF;
9650 break;
9651 }
9652 break;
9653 }
9654 case XML_PARSER_END_TAG:
9655 if (avail < 2)
9656 goto done;
9657 if (!terminate) {
9658 if (ctxt->progressive) {
9659 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9660 goto done;
9661 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9662 goto done;
9663 }
9664 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009665 if (ctxt->sax2) {
9666 xmlParseEndTag2(ctxt,
9667 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9668 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9669 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9670 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009671 }
9672#ifdef LIBXML_SAX1_ENABLED
9673 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009674 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009675#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009676 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009677 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009678 } else {
9679 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009680 }
9681 break;
9682 case XML_PARSER_CDATA_SECTION: {
9683 /*
9684 * The Push mode need to have the SAX callback for
9685 * cdataBlock merge back contiguous callbacks.
9686 */
9687 int base;
9688
9689 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9690 if (base < 0) {
9691 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9692 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9693 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009694 ctxt->sax->cdataBlock(ctxt->userData,
9695 ctxt->input->cur,
9696 XML_PARSER_BIG_BUFFER_SIZE);
9697 else if (ctxt->sax->characters != NULL)
9698 ctxt->sax->characters(ctxt->userData,
9699 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009700 XML_PARSER_BIG_BUFFER_SIZE);
9701 }
9702 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9703 ctxt->checkIndex = 0;
9704 }
9705 goto done;
9706 } else {
9707 if ((ctxt->sax != NULL) && (base > 0) &&
9708 (!ctxt->disableSAX)) {
9709 if (ctxt->sax->cdataBlock != NULL)
9710 ctxt->sax->cdataBlock(ctxt->userData,
9711 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009712 else if (ctxt->sax->characters != NULL)
9713 ctxt->sax->characters(ctxt->userData,
9714 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009715 }
9716 SKIP(base + 3);
9717 ctxt->checkIndex = 0;
9718 ctxt->instate = XML_PARSER_CONTENT;
9719#ifdef DEBUG_PUSH
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: entering CONTENT\n");
9722#endif
9723 }
9724 break;
9725 }
Owen Taylor3473f882001-02-23 17:55:21 +00009726 case XML_PARSER_MISC:
9727 SKIP_BLANKS;
9728 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009729 avail = ctxt->input->length -
9730 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009731 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009732 avail = ctxt->input->buf->buffer->use -
9733 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009734 if (avail < 2)
9735 goto done;
9736 cur = ctxt->input->cur[0];
9737 next = ctxt->input->cur[1];
9738 if ((cur == '<') && (next == '?')) {
9739 if ((!terminate) &&
9740 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9741 goto done;
9742#ifdef DEBUG_PUSH
9743 xmlGenericError(xmlGenericErrorContext,
9744 "PP: Parsing PI\n");
9745#endif
9746 xmlParsePI(ctxt);
9747 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009748 (ctxt->input->cur[2] == '-') &&
9749 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009750 if ((!terminate) &&
9751 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9752 goto done;
9753#ifdef DEBUG_PUSH
9754 xmlGenericError(xmlGenericErrorContext,
9755 "PP: Parsing Comment\n");
9756#endif
9757 xmlParseComment(ctxt);
9758 ctxt->instate = XML_PARSER_MISC;
9759 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009760 (ctxt->input->cur[2] == 'D') &&
9761 (ctxt->input->cur[3] == 'O') &&
9762 (ctxt->input->cur[4] == 'C') &&
9763 (ctxt->input->cur[5] == 'T') &&
9764 (ctxt->input->cur[6] == 'Y') &&
9765 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009766 (ctxt->input->cur[8] == 'E')) {
9767 if ((!terminate) &&
9768 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9769 goto done;
9770#ifdef DEBUG_PUSH
9771 xmlGenericError(xmlGenericErrorContext,
9772 "PP: Parsing internal subset\n");
9773#endif
9774 ctxt->inSubset = 1;
9775 xmlParseDocTypeDecl(ctxt);
9776 if (RAW == '[') {
9777 ctxt->instate = XML_PARSER_DTD;
9778#ifdef DEBUG_PUSH
9779 xmlGenericError(xmlGenericErrorContext,
9780 "PP: entering DTD\n");
9781#endif
9782 } else {
9783 /*
9784 * Create and update the external subset.
9785 */
9786 ctxt->inSubset = 2;
9787 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9788 (ctxt->sax->externalSubset != NULL))
9789 ctxt->sax->externalSubset(ctxt->userData,
9790 ctxt->intSubName, ctxt->extSubSystem,
9791 ctxt->extSubURI);
9792 ctxt->inSubset = 0;
9793 ctxt->instate = XML_PARSER_PROLOG;
9794#ifdef DEBUG_PUSH
9795 xmlGenericError(xmlGenericErrorContext,
9796 "PP: entering PROLOG\n");
9797#endif
9798 }
9799 } else if ((cur == '<') && (next == '!') &&
9800 (avail < 9)) {
9801 goto done;
9802 } else {
9803 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009804 ctxt->progressive = 1;
9805 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009806#ifdef DEBUG_PUSH
9807 xmlGenericError(xmlGenericErrorContext,
9808 "PP: entering START_TAG\n");
9809#endif
9810 }
9811 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009812 case XML_PARSER_PROLOG:
9813 SKIP_BLANKS;
9814 if (ctxt->input->buf == NULL)
9815 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9816 else
9817 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9818 if (avail < 2)
9819 goto done;
9820 cur = ctxt->input->cur[0];
9821 next = ctxt->input->cur[1];
9822 if ((cur == '<') && (next == '?')) {
9823 if ((!terminate) &&
9824 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9825 goto done;
9826#ifdef DEBUG_PUSH
9827 xmlGenericError(xmlGenericErrorContext,
9828 "PP: Parsing PI\n");
9829#endif
9830 xmlParsePI(ctxt);
9831 } else if ((cur == '<') && (next == '!') &&
9832 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9833 if ((!terminate) &&
9834 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9835 goto done;
9836#ifdef DEBUG_PUSH
9837 xmlGenericError(xmlGenericErrorContext,
9838 "PP: Parsing Comment\n");
9839#endif
9840 xmlParseComment(ctxt);
9841 ctxt->instate = XML_PARSER_PROLOG;
9842 } else if ((cur == '<') && (next == '!') &&
9843 (avail < 4)) {
9844 goto done;
9845 } else {
9846 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009847 ctxt->progressive = 1;
9848 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009849#ifdef DEBUG_PUSH
9850 xmlGenericError(xmlGenericErrorContext,
9851 "PP: entering START_TAG\n");
9852#endif
9853 }
9854 break;
9855 case XML_PARSER_EPILOG:
9856 SKIP_BLANKS;
9857 if (ctxt->input->buf == NULL)
9858 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9859 else
9860 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9861 if (avail < 2)
9862 goto done;
9863 cur = ctxt->input->cur[0];
9864 next = ctxt->input->cur[1];
9865 if ((cur == '<') && (next == '?')) {
9866 if ((!terminate) &&
9867 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9868 goto done;
9869#ifdef DEBUG_PUSH
9870 xmlGenericError(xmlGenericErrorContext,
9871 "PP: Parsing PI\n");
9872#endif
9873 xmlParsePI(ctxt);
9874 ctxt->instate = XML_PARSER_EPILOG;
9875 } else if ((cur == '<') && (next == '!') &&
9876 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9877 if ((!terminate) &&
9878 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9879 goto done;
9880#ifdef DEBUG_PUSH
9881 xmlGenericError(xmlGenericErrorContext,
9882 "PP: Parsing Comment\n");
9883#endif
9884 xmlParseComment(ctxt);
9885 ctxt->instate = XML_PARSER_EPILOG;
9886 } else if ((cur == '<') && (next == '!') &&
9887 (avail < 4)) {
9888 goto done;
9889 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009890 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009891 ctxt->instate = XML_PARSER_EOF;
9892#ifdef DEBUG_PUSH
9893 xmlGenericError(xmlGenericErrorContext,
9894 "PP: entering EOF\n");
9895#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009896 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009897 ctxt->sax->endDocument(ctxt->userData);
9898 goto done;
9899 }
9900 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009901 case XML_PARSER_DTD: {
9902 /*
9903 * Sorry but progressive parsing of the internal subset
9904 * is not expected to be supported. We first check that
9905 * the full content of the internal subset is available and
9906 * the parsing is launched only at that point.
9907 * Internal subset ends up with "']' S? '>'" in an unescaped
9908 * section and not in a ']]>' sequence which are conditional
9909 * sections (whoever argued to keep that crap in XML deserve
9910 * a place in hell !).
9911 */
9912 int base, i;
9913 xmlChar *buf;
9914 xmlChar quote = 0;
9915
9916 base = ctxt->input->cur - ctxt->input->base;
9917 if (base < 0) return(0);
9918 if (ctxt->checkIndex > base)
9919 base = ctxt->checkIndex;
9920 buf = ctxt->input->buf->buffer->content;
9921 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9922 base++) {
9923 if (quote != 0) {
9924 if (buf[base] == quote)
9925 quote = 0;
9926 continue;
9927 }
9928 if (buf[base] == '"') {
9929 quote = '"';
9930 continue;
9931 }
9932 if (buf[base] == '\'') {
9933 quote = '\'';
9934 continue;
9935 }
9936 if (buf[base] == ']') {
9937 if ((unsigned int) base +1 >=
9938 ctxt->input->buf->buffer->use)
9939 break;
9940 if (buf[base + 1] == ']') {
9941 /* conditional crap, skip both ']' ! */
9942 base++;
9943 continue;
9944 }
9945 for (i = 0;
9946 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9947 i++) {
9948 if (buf[base + i] == '>')
9949 goto found_end_int_subset;
9950 }
9951 break;
9952 }
9953 }
9954 /*
9955 * We didn't found the end of the Internal subset
9956 */
9957 if (quote == 0)
9958 ctxt->checkIndex = base;
9959#ifdef DEBUG_PUSH
9960 if (next == 0)
9961 xmlGenericError(xmlGenericErrorContext,
9962 "PP: lookup of int subset end filed\n");
9963#endif
9964 goto done;
9965
9966found_end_int_subset:
9967 xmlParseInternalSubset(ctxt);
9968 ctxt->inSubset = 2;
9969 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9970 (ctxt->sax->externalSubset != NULL))
9971 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9972 ctxt->extSubSystem, ctxt->extSubURI);
9973 ctxt->inSubset = 0;
9974 ctxt->instate = XML_PARSER_PROLOG;
9975 ctxt->checkIndex = 0;
9976#ifdef DEBUG_PUSH
9977 xmlGenericError(xmlGenericErrorContext,
9978 "PP: entering PROLOG\n");
9979#endif
9980 break;
9981 }
9982 case XML_PARSER_COMMENT:
9983 xmlGenericError(xmlGenericErrorContext,
9984 "PP: internal error, state == COMMENT\n");
9985 ctxt->instate = XML_PARSER_CONTENT;
9986#ifdef DEBUG_PUSH
9987 xmlGenericError(xmlGenericErrorContext,
9988 "PP: entering CONTENT\n");
9989#endif
9990 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009991 case XML_PARSER_IGNORE:
9992 xmlGenericError(xmlGenericErrorContext,
9993 "PP: internal error, state == IGNORE");
9994 ctxt->instate = XML_PARSER_DTD;
9995#ifdef DEBUG_PUSH
9996 xmlGenericError(xmlGenericErrorContext,
9997 "PP: entering DTD\n");
9998#endif
9999 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010000 case XML_PARSER_PI:
10001 xmlGenericError(xmlGenericErrorContext,
10002 "PP: internal error, state == PI\n");
10003 ctxt->instate = XML_PARSER_CONTENT;
10004#ifdef DEBUG_PUSH
10005 xmlGenericError(xmlGenericErrorContext,
10006 "PP: entering CONTENT\n");
10007#endif
10008 break;
10009 case XML_PARSER_ENTITY_DECL:
10010 xmlGenericError(xmlGenericErrorContext,
10011 "PP: internal error, state == ENTITY_DECL\n");
10012 ctxt->instate = XML_PARSER_DTD;
10013#ifdef DEBUG_PUSH
10014 xmlGenericError(xmlGenericErrorContext,
10015 "PP: entering DTD\n");
10016#endif
10017 break;
10018 case XML_PARSER_ENTITY_VALUE:
10019 xmlGenericError(xmlGenericErrorContext,
10020 "PP: internal error, state == ENTITY_VALUE\n");
10021 ctxt->instate = XML_PARSER_CONTENT;
10022#ifdef DEBUG_PUSH
10023 xmlGenericError(xmlGenericErrorContext,
10024 "PP: entering DTD\n");
10025#endif
10026 break;
10027 case XML_PARSER_ATTRIBUTE_VALUE:
10028 xmlGenericError(xmlGenericErrorContext,
10029 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10030 ctxt->instate = XML_PARSER_START_TAG;
10031#ifdef DEBUG_PUSH
10032 xmlGenericError(xmlGenericErrorContext,
10033 "PP: entering START_TAG\n");
10034#endif
10035 break;
10036 case XML_PARSER_SYSTEM_LITERAL:
10037 xmlGenericError(xmlGenericErrorContext,
10038 "PP: internal error, state == SYSTEM_LITERAL\n");
10039 ctxt->instate = XML_PARSER_START_TAG;
10040#ifdef DEBUG_PUSH
10041 xmlGenericError(xmlGenericErrorContext,
10042 "PP: entering START_TAG\n");
10043#endif
10044 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010045 case XML_PARSER_PUBLIC_LITERAL:
10046 xmlGenericError(xmlGenericErrorContext,
10047 "PP: internal error, state == PUBLIC_LITERAL\n");
10048 ctxt->instate = XML_PARSER_START_TAG;
10049#ifdef DEBUG_PUSH
10050 xmlGenericError(xmlGenericErrorContext,
10051 "PP: entering START_TAG\n");
10052#endif
10053 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010054 }
10055 }
10056done:
10057#ifdef DEBUG_PUSH
10058 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10059#endif
10060 return(ret);
10061}
10062
10063/**
Owen Taylor3473f882001-02-23 17:55:21 +000010064 * xmlParseChunk:
10065 * @ctxt: an XML parser context
10066 * @chunk: an char array
10067 * @size: the size in byte of the chunk
10068 * @terminate: last chunk indicator
10069 *
10070 * Parse a Chunk of memory
10071 *
10072 * Returns zero if no error, the xmlParserErrors otherwise.
10073 */
10074int
10075xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10076 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010077 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10078 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010079 if (ctxt->instate == XML_PARSER_START)
10080 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010081 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10082 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10083 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10084 int cur = ctxt->input->cur - ctxt->input->base;
10085
10086 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10087 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10088 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010089 ctxt->input->end =
10090 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010091#ifdef DEBUG_PUSH
10092 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10093#endif
10094
Owen Taylor3473f882001-02-23 17:55:21 +000010095 } else if (ctxt->instate != XML_PARSER_EOF) {
10096 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10097 xmlParserInputBufferPtr in = ctxt->input->buf;
10098 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10099 (in->raw != NULL)) {
10100 int nbchars;
10101
10102 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10103 if (nbchars < 0) {
10104 xmlGenericError(xmlGenericErrorContext,
10105 "xmlParseChunk: encoder error\n");
10106 return(XML_ERR_INVALID_ENCODING);
10107 }
10108 }
10109 }
10110 }
10111 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010112 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10113 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010114 if (terminate) {
10115 /*
10116 * Check for termination
10117 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010118 int avail = 0;
10119 if (ctxt->input->buf == NULL)
10120 avail = ctxt->input->length -
10121 (ctxt->input->cur - ctxt->input->base);
10122 else
10123 avail = ctxt->input->buf->buffer->use -
10124 (ctxt->input->cur - ctxt->input->base);
10125
Owen Taylor3473f882001-02-23 17:55:21 +000010126 if ((ctxt->instate != XML_PARSER_EOF) &&
10127 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010128 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010129 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010130 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010131 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010132 }
Owen Taylor3473f882001-02-23 17:55:21 +000010133 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010134 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010135 ctxt->sax->endDocument(ctxt->userData);
10136 }
10137 ctxt->instate = XML_PARSER_EOF;
10138 }
10139 return((xmlParserErrors) ctxt->errNo);
10140}
10141
10142/************************************************************************
10143 * *
10144 * I/O front end functions to the parser *
10145 * *
10146 ************************************************************************/
10147
10148/**
10149 * xmlStopParser:
10150 * @ctxt: an XML parser context
10151 *
10152 * Blocks further parser processing
10153 */
10154void
10155xmlStopParser(xmlParserCtxtPtr ctxt) {
10156 ctxt->instate = XML_PARSER_EOF;
10157 if (ctxt->input != NULL)
10158 ctxt->input->cur = BAD_CAST"";
10159}
10160
10161/**
10162 * xmlCreatePushParserCtxt:
10163 * @sax: a SAX handler
10164 * @user_data: The user data returned on SAX callbacks
10165 * @chunk: a pointer to an array of chars
10166 * @size: number of chars in the array
10167 * @filename: an optional file name or URI
10168 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010169 * Create a parser context for using the XML parser in push mode.
10170 * If @buffer and @size are non-NULL, the data is used to detect
10171 * the encoding. The remaining characters will be parsed so they
10172 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010173 * To allow content encoding detection, @size should be >= 4
10174 * The value of @filename is used for fetching external entities
10175 * and error/warning reports.
10176 *
10177 * Returns the new parser context or NULL
10178 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010179
Owen Taylor3473f882001-02-23 17:55:21 +000010180xmlParserCtxtPtr
10181xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10182 const char *chunk, int size, const char *filename) {
10183 xmlParserCtxtPtr ctxt;
10184 xmlParserInputPtr inputStream;
10185 xmlParserInputBufferPtr buf;
10186 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10187
10188 /*
10189 * plug some encoding conversion routines
10190 */
10191 if ((chunk != NULL) && (size >= 4))
10192 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10193
10194 buf = xmlAllocParserInputBuffer(enc);
10195 if (buf == NULL) return(NULL);
10196
10197 ctxt = xmlNewParserCtxt();
10198 if (ctxt == NULL) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010199 xmlGenericError(xmlGenericErrorContext,
10200 "xml parser: out of memory\n");
10201 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010202 return(NULL);
10203 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010204 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10205 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010206 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010207 xmlFreeParserInputBuffer(buf);
10208 xmlFreeParserCtxt(ctxt);
10209 return(NULL);
10210 }
Owen Taylor3473f882001-02-23 17:55:21 +000010211 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010212#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010213 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010214#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010215 xmlFree(ctxt->sax);
10216 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10217 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010218 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010219 xmlFreeParserInputBuffer(buf);
10220 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010221 return(NULL);
10222 }
10223 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10224 if (user_data != NULL)
10225 ctxt->userData = user_data;
10226 }
10227 if (filename == NULL) {
10228 ctxt->directory = NULL;
10229 } else {
10230 ctxt->directory = xmlParserGetDirectory(filename);
10231 }
10232
10233 inputStream = xmlNewInputStream(ctxt);
10234 if (inputStream == NULL) {
10235 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010236 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010237 return(NULL);
10238 }
10239
10240 if (filename == NULL)
10241 inputStream->filename = NULL;
10242 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010243 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010244 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010245 inputStream->buf = buf;
10246 inputStream->base = inputStream->buf->buffer->content;
10247 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010248 inputStream->end =
10249 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010250
10251 inputPush(ctxt, inputStream);
10252
10253 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10254 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010255 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10256 int cur = ctxt->input->cur - ctxt->input->base;
10257
Owen Taylor3473f882001-02-23 17:55:21 +000010258 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010259
10260 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10261 ctxt->input->cur = ctxt->input->base + cur;
10262 ctxt->input->end =
10263 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010264#ifdef DEBUG_PUSH
10265 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10266#endif
10267 }
10268
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010269 if (enc != XML_CHAR_ENCODING_NONE) {
10270 xmlSwitchEncoding(ctxt, enc);
10271 }
10272
Owen Taylor3473f882001-02-23 17:55:21 +000010273 return(ctxt);
10274}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010275#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010276
10277/**
10278 * xmlCreateIOParserCtxt:
10279 * @sax: a SAX handler
10280 * @user_data: The user data returned on SAX callbacks
10281 * @ioread: an I/O read function
10282 * @ioclose: an I/O close function
10283 * @ioctx: an I/O handler
10284 * @enc: the charset encoding if known
10285 *
10286 * Create a parser context for using the XML parser with an existing
10287 * I/O stream
10288 *
10289 * Returns the new parser context or NULL
10290 */
10291xmlParserCtxtPtr
10292xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10293 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10294 void *ioctx, xmlCharEncoding enc) {
10295 xmlParserCtxtPtr ctxt;
10296 xmlParserInputPtr inputStream;
10297 xmlParserInputBufferPtr buf;
10298
10299 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10300 if (buf == NULL) return(NULL);
10301
10302 ctxt = xmlNewParserCtxt();
10303 if (ctxt == NULL) {
10304 xmlFree(buf);
10305 return(NULL);
10306 }
10307 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010308#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010309 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010310#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010311 xmlFree(ctxt->sax);
10312 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10313 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010314 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010315 xmlFree(ctxt);
10316 return(NULL);
10317 }
10318 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10319 if (user_data != NULL)
10320 ctxt->userData = user_data;
10321 }
10322
10323 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10324 if (inputStream == NULL) {
10325 xmlFreeParserCtxt(ctxt);
10326 return(NULL);
10327 }
10328 inputPush(ctxt, inputStream);
10329
10330 return(ctxt);
10331}
10332
Daniel Veillard4432df22003-09-28 18:58:27 +000010333#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010334/************************************************************************
10335 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010336 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010337 * *
10338 ************************************************************************/
10339
10340/**
10341 * xmlIOParseDTD:
10342 * @sax: the SAX handler block or NULL
10343 * @input: an Input Buffer
10344 * @enc: the charset encoding if known
10345 *
10346 * Load and parse a DTD
10347 *
10348 * Returns the resulting xmlDtdPtr or NULL in case of error.
10349 * @input will be freed at parsing end.
10350 */
10351
10352xmlDtdPtr
10353xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10354 xmlCharEncoding enc) {
10355 xmlDtdPtr ret = NULL;
10356 xmlParserCtxtPtr ctxt;
10357 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010358 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010359
10360 if (input == NULL)
10361 return(NULL);
10362
10363 ctxt = xmlNewParserCtxt();
10364 if (ctxt == NULL) {
10365 return(NULL);
10366 }
10367
10368 /*
10369 * Set-up the SAX context
10370 */
10371 if (sax != NULL) {
10372 if (ctxt->sax != NULL)
10373 xmlFree(ctxt->sax);
10374 ctxt->sax = sax;
10375 ctxt->userData = NULL;
10376 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010377 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010378
10379 /*
10380 * generate a parser input from the I/O handler
10381 */
10382
10383 pinput = xmlNewIOInputStream(ctxt, input, enc);
10384 if (pinput == NULL) {
10385 if (sax != NULL) ctxt->sax = NULL;
10386 xmlFreeParserCtxt(ctxt);
10387 return(NULL);
10388 }
10389
10390 /*
10391 * plug some encoding conversion routines here.
10392 */
10393 xmlPushInput(ctxt, pinput);
10394
10395 pinput->filename = NULL;
10396 pinput->line = 1;
10397 pinput->col = 1;
10398 pinput->base = ctxt->input->cur;
10399 pinput->cur = ctxt->input->cur;
10400 pinput->free = NULL;
10401
10402 /*
10403 * let's parse that entity knowing it's an external subset.
10404 */
10405 ctxt->inSubset = 2;
10406 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10407 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10408 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010409
10410 if (enc == XML_CHAR_ENCODING_NONE) {
10411 /*
10412 * Get the 4 first bytes and decode the charset
10413 * if enc != XML_CHAR_ENCODING_NONE
10414 * plug some encoding conversion routines.
10415 */
10416 start[0] = RAW;
10417 start[1] = NXT(1);
10418 start[2] = NXT(2);
10419 start[3] = NXT(3);
10420 enc = xmlDetectCharEncoding(start, 4);
10421 if (enc != XML_CHAR_ENCODING_NONE) {
10422 xmlSwitchEncoding(ctxt, enc);
10423 }
10424 }
10425
Owen Taylor3473f882001-02-23 17:55:21 +000010426 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10427
10428 if (ctxt->myDoc != NULL) {
10429 if (ctxt->wellFormed) {
10430 ret = ctxt->myDoc->extSubset;
10431 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010432 if (ret != NULL) {
10433 xmlNodePtr tmp;
10434
10435 ret->doc = NULL;
10436 tmp = ret->children;
10437 while (tmp != NULL) {
10438 tmp->doc = NULL;
10439 tmp = tmp->next;
10440 }
10441 }
Owen Taylor3473f882001-02-23 17:55:21 +000010442 } else {
10443 ret = NULL;
10444 }
10445 xmlFreeDoc(ctxt->myDoc);
10446 ctxt->myDoc = NULL;
10447 }
10448 if (sax != NULL) ctxt->sax = NULL;
10449 xmlFreeParserCtxt(ctxt);
10450
10451 return(ret);
10452}
10453
10454/**
10455 * xmlSAXParseDTD:
10456 * @sax: the SAX handler block
10457 * @ExternalID: a NAME* containing the External ID of the DTD
10458 * @SystemID: a NAME* containing the URL to the DTD
10459 *
10460 * Load and parse an external subset.
10461 *
10462 * Returns the resulting xmlDtdPtr or NULL in case of error.
10463 */
10464
10465xmlDtdPtr
10466xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10467 const xmlChar *SystemID) {
10468 xmlDtdPtr ret = NULL;
10469 xmlParserCtxtPtr ctxt;
10470 xmlParserInputPtr input = NULL;
10471 xmlCharEncoding enc;
10472
10473 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10474
10475 ctxt = xmlNewParserCtxt();
10476 if (ctxt == NULL) {
10477 return(NULL);
10478 }
10479
10480 /*
10481 * Set-up the SAX context
10482 */
10483 if (sax != NULL) {
10484 if (ctxt->sax != NULL)
10485 xmlFree(ctxt->sax);
10486 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010487 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010488 }
10489
10490 /*
10491 * Ask the Entity resolver to load the damn thing
10492 */
10493
10494 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010495 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010496 if (input == NULL) {
10497 if (sax != NULL) ctxt->sax = NULL;
10498 xmlFreeParserCtxt(ctxt);
10499 return(NULL);
10500 }
10501
10502 /*
10503 * plug some encoding conversion routines here.
10504 */
10505 xmlPushInput(ctxt, input);
10506 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10507 xmlSwitchEncoding(ctxt, enc);
10508
10509 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010510 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010511 input->line = 1;
10512 input->col = 1;
10513 input->base = ctxt->input->cur;
10514 input->cur = ctxt->input->cur;
10515 input->free = NULL;
10516
10517 /*
10518 * let's parse that entity knowing it's an external subset.
10519 */
10520 ctxt->inSubset = 2;
10521 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10522 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10523 ExternalID, SystemID);
10524 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10525
10526 if (ctxt->myDoc != NULL) {
10527 if (ctxt->wellFormed) {
10528 ret = ctxt->myDoc->extSubset;
10529 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010530 if (ret != NULL) {
10531 xmlNodePtr tmp;
10532
10533 ret->doc = NULL;
10534 tmp = ret->children;
10535 while (tmp != NULL) {
10536 tmp->doc = NULL;
10537 tmp = tmp->next;
10538 }
10539 }
Owen Taylor3473f882001-02-23 17:55:21 +000010540 } else {
10541 ret = NULL;
10542 }
10543 xmlFreeDoc(ctxt->myDoc);
10544 ctxt->myDoc = NULL;
10545 }
10546 if (sax != NULL) ctxt->sax = NULL;
10547 xmlFreeParserCtxt(ctxt);
10548
10549 return(ret);
10550}
10551
Daniel Veillard4432df22003-09-28 18:58:27 +000010552
Owen Taylor3473f882001-02-23 17:55:21 +000010553/**
10554 * xmlParseDTD:
10555 * @ExternalID: a NAME* containing the External ID of the DTD
10556 * @SystemID: a NAME* containing the URL to the DTD
10557 *
10558 * Load and parse an external subset.
10559 *
10560 * Returns the resulting xmlDtdPtr or NULL in case of error.
10561 */
10562
10563xmlDtdPtr
10564xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10565 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10566}
Daniel Veillard4432df22003-09-28 18:58:27 +000010567#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010568
10569/************************************************************************
10570 * *
10571 * Front ends when parsing an Entity *
10572 * *
10573 ************************************************************************/
10574
10575/**
Owen Taylor3473f882001-02-23 17:55:21 +000010576 * xmlParseCtxtExternalEntity:
10577 * @ctx: the existing parsing context
10578 * @URL: the URL for the entity to load
10579 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010580 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010581 *
10582 * Parse an external general entity within an existing parsing context
10583 * An external general parsed entity is well-formed if it matches the
10584 * production labeled extParsedEnt.
10585 *
10586 * [78] extParsedEnt ::= TextDecl? content
10587 *
10588 * Returns 0 if the entity is well formed, -1 in case of args problem and
10589 * the parser error code otherwise
10590 */
10591
10592int
10593xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010594 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010595 xmlParserCtxtPtr ctxt;
10596 xmlDocPtr newDoc;
10597 xmlSAXHandlerPtr oldsax = NULL;
10598 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010599 xmlChar start[4];
10600 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010601
10602 if (ctx->depth > 40) {
10603 return(XML_ERR_ENTITY_LOOP);
10604 }
10605
Daniel Veillardcda96922001-08-21 10:56:31 +000010606 if (lst != NULL)
10607 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010608 if ((URL == NULL) && (ID == NULL))
10609 return(-1);
10610 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10611 return(-1);
10612
10613
10614 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10615 if (ctxt == NULL) return(-1);
10616 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010617 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010618 oldsax = ctxt->sax;
10619 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010620 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010621 newDoc = xmlNewDoc(BAD_CAST "1.0");
10622 if (newDoc == NULL) {
10623 xmlFreeParserCtxt(ctxt);
10624 return(-1);
10625 }
10626 if (ctx->myDoc != NULL) {
10627 newDoc->intSubset = ctx->myDoc->intSubset;
10628 newDoc->extSubset = ctx->myDoc->extSubset;
10629 }
10630 if (ctx->myDoc->URL != NULL) {
10631 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10632 }
10633 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10634 if (newDoc->children == NULL) {
10635 ctxt->sax = oldsax;
10636 xmlFreeParserCtxt(ctxt);
10637 newDoc->intSubset = NULL;
10638 newDoc->extSubset = NULL;
10639 xmlFreeDoc(newDoc);
10640 return(-1);
10641 }
10642 nodePush(ctxt, newDoc->children);
10643 if (ctx->myDoc == NULL) {
10644 ctxt->myDoc = newDoc;
10645 } else {
10646 ctxt->myDoc = ctx->myDoc;
10647 newDoc->children->doc = ctx->myDoc;
10648 }
10649
Daniel Veillard87a764e2001-06-20 17:41:10 +000010650 /*
10651 * Get the 4 first bytes and decode the charset
10652 * if enc != XML_CHAR_ENCODING_NONE
10653 * plug some encoding conversion routines.
10654 */
10655 GROW
10656 start[0] = RAW;
10657 start[1] = NXT(1);
10658 start[2] = NXT(2);
10659 start[3] = NXT(3);
10660 enc = xmlDetectCharEncoding(start, 4);
10661 if (enc != XML_CHAR_ENCODING_NONE) {
10662 xmlSwitchEncoding(ctxt, enc);
10663 }
10664
Owen Taylor3473f882001-02-23 17:55:21 +000010665 /*
10666 * Parse a possible text declaration first
10667 */
Owen Taylor3473f882001-02-23 17:55:21 +000010668 if ((RAW == '<') && (NXT(1) == '?') &&
10669 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10670 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10671 xmlParseTextDecl(ctxt);
10672 }
10673
10674 /*
10675 * Doing validity checking on chunk doesn't make sense
10676 */
10677 ctxt->instate = XML_PARSER_CONTENT;
10678 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010679 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010680 ctxt->loadsubset = ctx->loadsubset;
10681 ctxt->depth = ctx->depth + 1;
10682 ctxt->replaceEntities = ctx->replaceEntities;
10683 if (ctxt->validate) {
10684 ctxt->vctxt.error = ctx->vctxt.error;
10685 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010686 } else {
10687 ctxt->vctxt.error = NULL;
10688 ctxt->vctxt.warning = NULL;
10689 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010690 ctxt->vctxt.nodeTab = NULL;
10691 ctxt->vctxt.nodeNr = 0;
10692 ctxt->vctxt.nodeMax = 0;
10693 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010694
10695 xmlParseContent(ctxt);
10696
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010697 ctx->validate = ctxt->validate;
10698 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010699 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010700 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010701 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010702 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010703 }
10704 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010705 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010706 }
10707
10708 if (!ctxt->wellFormed) {
10709 if (ctxt->errNo == 0)
10710 ret = 1;
10711 else
10712 ret = ctxt->errNo;
10713 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010714 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010715 xmlNodePtr cur;
10716
10717 /*
10718 * Return the newly created nodeset after unlinking it from
10719 * they pseudo parent.
10720 */
10721 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010722 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010723 while (cur != NULL) {
10724 cur->parent = NULL;
10725 cur = cur->next;
10726 }
10727 newDoc->children->children = NULL;
10728 }
10729 ret = 0;
10730 }
10731 ctxt->sax = oldsax;
10732 xmlFreeParserCtxt(ctxt);
10733 newDoc->intSubset = NULL;
10734 newDoc->extSubset = NULL;
10735 xmlFreeDoc(newDoc);
10736
10737 return(ret);
10738}
10739
10740/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010741 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010742 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010743 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010744 * @sax: the SAX handler bloc (possibly NULL)
10745 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10746 * @depth: Used for loop detection, use 0
10747 * @URL: the URL for the entity to load
10748 * @ID: the System ID for the entity to load
10749 * @list: the return value for the set of parsed nodes
10750 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010751 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010752 *
10753 * Returns 0 if the entity is well formed, -1 in case of args problem and
10754 * the parser error code otherwise
10755 */
10756
Daniel Veillard7d515752003-09-26 19:12:37 +000010757static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010758xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10759 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010760 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010761 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010762 xmlParserCtxtPtr ctxt;
10763 xmlDocPtr newDoc;
10764 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010765 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010766 xmlChar start[4];
10767 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010768
10769 if (depth > 40) {
10770 return(XML_ERR_ENTITY_LOOP);
10771 }
10772
10773
10774
10775 if (list != NULL)
10776 *list = NULL;
10777 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010778 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010779 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010780 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010781
10782
10783 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010784 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010785 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010786 if (oldctxt != NULL) {
10787 ctxt->_private = oldctxt->_private;
10788 ctxt->loadsubset = oldctxt->loadsubset;
10789 ctxt->validate = oldctxt->validate;
10790 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010791 ctxt->record_info = oldctxt->record_info;
10792 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10793 ctxt->node_seq.length = oldctxt->node_seq.length;
10794 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010795 } else {
10796 /*
10797 * Doing validity checking on chunk without context
10798 * doesn't make sense
10799 */
10800 ctxt->_private = NULL;
10801 ctxt->validate = 0;
10802 ctxt->external = 2;
10803 ctxt->loadsubset = 0;
10804 }
Owen Taylor3473f882001-02-23 17:55:21 +000010805 if (sax != NULL) {
10806 oldsax = ctxt->sax;
10807 ctxt->sax = sax;
10808 if (user_data != NULL)
10809 ctxt->userData = user_data;
10810 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010811 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010812 newDoc = xmlNewDoc(BAD_CAST "1.0");
10813 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010814 ctxt->node_seq.maximum = 0;
10815 ctxt->node_seq.length = 0;
10816 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010817 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010818 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010819 }
10820 if (doc != NULL) {
10821 newDoc->intSubset = doc->intSubset;
10822 newDoc->extSubset = doc->extSubset;
10823 }
10824 if (doc->URL != NULL) {
10825 newDoc->URL = xmlStrdup(doc->URL);
10826 }
10827 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10828 if (newDoc->children == NULL) {
10829 if (sax != NULL)
10830 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010831 ctxt->node_seq.maximum = 0;
10832 ctxt->node_seq.length = 0;
10833 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010834 xmlFreeParserCtxt(ctxt);
10835 newDoc->intSubset = NULL;
10836 newDoc->extSubset = NULL;
10837 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010838 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010839 }
10840 nodePush(ctxt, newDoc->children);
10841 if (doc == NULL) {
10842 ctxt->myDoc = newDoc;
10843 } else {
10844 ctxt->myDoc = doc;
10845 newDoc->children->doc = doc;
10846 }
10847
Daniel Veillard87a764e2001-06-20 17:41:10 +000010848 /*
10849 * Get the 4 first bytes and decode the charset
10850 * if enc != XML_CHAR_ENCODING_NONE
10851 * plug some encoding conversion routines.
10852 */
10853 GROW;
10854 start[0] = RAW;
10855 start[1] = NXT(1);
10856 start[2] = NXT(2);
10857 start[3] = NXT(3);
10858 enc = xmlDetectCharEncoding(start, 4);
10859 if (enc != XML_CHAR_ENCODING_NONE) {
10860 xmlSwitchEncoding(ctxt, enc);
10861 }
10862
Owen Taylor3473f882001-02-23 17:55:21 +000010863 /*
10864 * Parse a possible text declaration first
10865 */
Owen Taylor3473f882001-02-23 17:55:21 +000010866 if ((RAW == '<') && (NXT(1) == '?') &&
10867 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10868 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10869 xmlParseTextDecl(ctxt);
10870 }
10871
Owen Taylor3473f882001-02-23 17:55:21 +000010872 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010873 ctxt->depth = depth;
10874
10875 xmlParseContent(ctxt);
10876
Daniel Veillard561b7f82002-03-20 21:55:57 +000010877 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010878 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010879 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010880 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010881 }
10882 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010883 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010884 }
10885
10886 if (!ctxt->wellFormed) {
10887 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010888 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010889 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010890 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010891 } else {
10892 if (list != NULL) {
10893 xmlNodePtr cur;
10894
10895 /*
10896 * Return the newly created nodeset after unlinking it from
10897 * they pseudo parent.
10898 */
10899 cur = newDoc->children->children;
10900 *list = cur;
10901 while (cur != NULL) {
10902 cur->parent = NULL;
10903 cur = cur->next;
10904 }
10905 newDoc->children->children = NULL;
10906 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010907 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010908 }
10909 if (sax != NULL)
10910 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010911 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10912 oldctxt->node_seq.length = ctxt->node_seq.length;
10913 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010914 ctxt->node_seq.maximum = 0;
10915 ctxt->node_seq.length = 0;
10916 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010917 xmlFreeParserCtxt(ctxt);
10918 newDoc->intSubset = NULL;
10919 newDoc->extSubset = NULL;
10920 xmlFreeDoc(newDoc);
10921
10922 return(ret);
10923}
10924
Daniel Veillard81273902003-09-30 00:43:48 +000010925#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010926/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010927 * xmlParseExternalEntity:
10928 * @doc: the document the chunk pertains to
10929 * @sax: the SAX handler bloc (possibly NULL)
10930 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10931 * @depth: Used for loop detection, use 0
10932 * @URL: the URL for the entity to load
10933 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010934 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010935 *
10936 * Parse an external general entity
10937 * An external general parsed entity is well-formed if it matches the
10938 * production labeled extParsedEnt.
10939 *
10940 * [78] extParsedEnt ::= TextDecl? content
10941 *
10942 * Returns 0 if the entity is well formed, -1 in case of args problem and
10943 * the parser error code otherwise
10944 */
10945
10946int
10947xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010948 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010949 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010950 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010951}
10952
10953/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010954 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010955 * @doc: the document the chunk pertains to
10956 * @sax: the SAX handler bloc (possibly NULL)
10957 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10958 * @depth: Used for loop detection, use 0
10959 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010960 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010961 *
10962 * Parse a well-balanced chunk of an XML document
10963 * called by the parser
10964 * The allowed sequence for the Well Balanced Chunk is the one defined by
10965 * the content production in the XML grammar:
10966 *
10967 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10968 *
10969 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10970 * the parser error code otherwise
10971 */
10972
10973int
10974xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010975 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010976 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10977 depth, string, lst, 0 );
10978}
Daniel Veillard81273902003-09-30 00:43:48 +000010979#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010980
10981/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010982 * xmlParseBalancedChunkMemoryInternal:
10983 * @oldctxt: the existing parsing context
10984 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10985 * @user_data: the user data field for the parser context
10986 * @lst: the return value for the set of parsed nodes
10987 *
10988 *
10989 * Parse a well-balanced chunk of an XML document
10990 * called by the parser
10991 * The allowed sequence for the Well Balanced Chunk is the one defined by
10992 * the content production in the XML grammar:
10993 *
10994 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10995 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010996 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10997 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010998 *
10999 * In case recover is set to 1, the nodelist will not be empty even if
11000 * the parsed chunk is not well balanced.
11001 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011002static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011003xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11004 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11005 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011006 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011007 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011008 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011009 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011010 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011011
11012 if (oldctxt->depth > 40) {
11013 return(XML_ERR_ENTITY_LOOP);
11014 }
11015
11016
11017 if (lst != NULL)
11018 *lst = NULL;
11019 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011020 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011021
11022 size = xmlStrlen(string);
11023
11024 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011025 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011026 if (user_data != NULL)
11027 ctxt->userData = user_data;
11028 else
11029 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011030 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11031 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011032
11033 oldsax = ctxt->sax;
11034 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011035 xmlDetectSAX2(ctxt);
11036
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011037 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011038 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011039 newDoc = xmlNewDoc(BAD_CAST "1.0");
11040 if (newDoc == NULL) {
11041 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011042 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011043 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011044 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011045 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011046 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011047 } else {
11048 ctxt->myDoc = oldctxt->myDoc;
11049 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011050 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000011051 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000011052 BAD_CAST "pseudoroot", NULL);
11053 if (ctxt->myDoc->children == NULL) {
11054 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011055 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011056 xmlFreeParserCtxt(ctxt);
11057 if (newDoc != NULL)
11058 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011059 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011060 }
11061 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011062 ctxt->instate = XML_PARSER_CONTENT;
11063 ctxt->depth = oldctxt->depth + 1;
11064
Daniel Veillard328f48c2002-11-15 15:24:34 +000011065 ctxt->validate = 0;
11066 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011067 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11068 /*
11069 * ID/IDREF registration will be done in xmlValidateElement below
11070 */
11071 ctxt->loadsubset |= XML_SKIP_IDS;
11072 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011073 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011074
Daniel Veillard68e9e742002-11-16 15:35:11 +000011075 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011076 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011077 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011078 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011079 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011080 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011081 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011082 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011083 }
11084
11085 if (!ctxt->wellFormed) {
11086 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011087 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011088 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011089 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011090 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011091 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011092 }
11093
William M. Brack7b9154b2003-09-27 19:23:50 +000011094 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011095 xmlNodePtr cur;
11096
11097 /*
11098 * Return the newly created nodeset after unlinking it from
11099 * they pseudo parent.
11100 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011101 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011102 *lst = cur;
11103 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011104#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011105 if (oldctxt->validate && oldctxt->wellFormed &&
11106 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11107 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11108 oldctxt->myDoc, cur);
11109 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011110#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011111 cur->parent = NULL;
11112 cur = cur->next;
11113 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011114 ctxt->myDoc->children->children = NULL;
11115 }
11116 if (ctxt->myDoc != NULL) {
11117 xmlFreeNode(ctxt->myDoc->children);
11118 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011119 }
11120
11121 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011122 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011123 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011124 if (newDoc != NULL)
11125 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011126
11127 return(ret);
11128}
11129
Daniel Veillard81273902003-09-30 00:43:48 +000011130#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011131/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011132 * xmlParseBalancedChunkMemoryRecover:
11133 * @doc: the document the chunk pertains to
11134 * @sax: the SAX handler bloc (possibly NULL)
11135 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11136 * @depth: Used for loop detection, use 0
11137 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11138 * @lst: the return value for the set of parsed nodes
11139 * @recover: return nodes even if the data is broken (use 0)
11140 *
11141 *
11142 * Parse a well-balanced chunk of an XML document
11143 * called by the parser
11144 * The allowed sequence for the Well Balanced Chunk is the one defined by
11145 * the content production in the XML grammar:
11146 *
11147 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11148 *
11149 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11150 * the parser error code otherwise
11151 *
11152 * In case recover is set to 1, the nodelist will not be empty even if
11153 * the parsed chunk is not well balanced.
11154 */
11155int
11156xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11157 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11158 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011159 xmlParserCtxtPtr ctxt;
11160 xmlDocPtr newDoc;
11161 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011162 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011163 int size;
11164 int ret = 0;
11165
11166 if (depth > 40) {
11167 return(XML_ERR_ENTITY_LOOP);
11168 }
11169
11170
Daniel Veillardcda96922001-08-21 10:56:31 +000011171 if (lst != NULL)
11172 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011173 if (string == NULL)
11174 return(-1);
11175
11176 size = xmlStrlen(string);
11177
11178 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11179 if (ctxt == NULL) return(-1);
11180 ctxt->userData = ctxt;
11181 if (sax != NULL) {
11182 oldsax = ctxt->sax;
11183 ctxt->sax = sax;
11184 if (user_data != NULL)
11185 ctxt->userData = user_data;
11186 }
11187 newDoc = xmlNewDoc(BAD_CAST "1.0");
11188 if (newDoc == NULL) {
11189 xmlFreeParserCtxt(ctxt);
11190 return(-1);
11191 }
11192 if (doc != NULL) {
11193 newDoc->intSubset = doc->intSubset;
11194 newDoc->extSubset = doc->extSubset;
11195 }
11196 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11197 if (newDoc->children == NULL) {
11198 if (sax != NULL)
11199 ctxt->sax = oldsax;
11200 xmlFreeParserCtxt(ctxt);
11201 newDoc->intSubset = NULL;
11202 newDoc->extSubset = NULL;
11203 xmlFreeDoc(newDoc);
11204 return(-1);
11205 }
11206 nodePush(ctxt, newDoc->children);
11207 if (doc == NULL) {
11208 ctxt->myDoc = newDoc;
11209 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011210 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011211 newDoc->children->doc = doc;
11212 }
11213 ctxt->instate = XML_PARSER_CONTENT;
11214 ctxt->depth = depth;
11215
11216 /*
11217 * Doing validity checking on chunk doesn't make sense
11218 */
11219 ctxt->validate = 0;
11220 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011221 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011222
Daniel Veillardb39bc392002-10-26 19:29:51 +000011223 if ( doc != NULL ){
11224 content = doc->children;
11225 doc->children = NULL;
11226 xmlParseContent(ctxt);
11227 doc->children = content;
11228 }
11229 else {
11230 xmlParseContent(ctxt);
11231 }
Owen Taylor3473f882001-02-23 17:55:21 +000011232 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011233 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011234 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011235 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011236 }
11237 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011238 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011239 }
11240
11241 if (!ctxt->wellFormed) {
11242 if (ctxt->errNo == 0)
11243 ret = 1;
11244 else
11245 ret = ctxt->errNo;
11246 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011247 ret = 0;
11248 }
11249
11250 if (lst != NULL && (ret == 0 || recover == 1)) {
11251 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011252
11253 /*
11254 * Return the newly created nodeset after unlinking it from
11255 * they pseudo parent.
11256 */
11257 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011258 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011259 while (cur != NULL) {
11260 cur->parent = NULL;
11261 cur = cur->next;
11262 }
11263 newDoc->children->children = NULL;
11264 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011265
Owen Taylor3473f882001-02-23 17:55:21 +000011266 if (sax != NULL)
11267 ctxt->sax = oldsax;
11268 xmlFreeParserCtxt(ctxt);
11269 newDoc->intSubset = NULL;
11270 newDoc->extSubset = NULL;
11271 xmlFreeDoc(newDoc);
11272
11273 return(ret);
11274}
11275
11276/**
11277 * xmlSAXParseEntity:
11278 * @sax: the SAX handler block
11279 * @filename: the filename
11280 *
11281 * parse an XML external entity out of context and build a tree.
11282 * It use the given SAX function block to handle the parsing callback.
11283 * If sax is NULL, fallback to the default DOM tree building routines.
11284 *
11285 * [78] extParsedEnt ::= TextDecl? content
11286 *
11287 * This correspond to a "Well Balanced" chunk
11288 *
11289 * Returns the resulting document tree
11290 */
11291
11292xmlDocPtr
11293xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11294 xmlDocPtr ret;
11295 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011296
11297 ctxt = xmlCreateFileParserCtxt(filename);
11298 if (ctxt == NULL) {
11299 return(NULL);
11300 }
11301 if (sax != NULL) {
11302 if (ctxt->sax != NULL)
11303 xmlFree(ctxt->sax);
11304 ctxt->sax = sax;
11305 ctxt->userData = NULL;
11306 }
11307
Owen Taylor3473f882001-02-23 17:55:21 +000011308 xmlParseExtParsedEnt(ctxt);
11309
11310 if (ctxt->wellFormed)
11311 ret = ctxt->myDoc;
11312 else {
11313 ret = NULL;
11314 xmlFreeDoc(ctxt->myDoc);
11315 ctxt->myDoc = NULL;
11316 }
11317 if (sax != NULL)
11318 ctxt->sax = NULL;
11319 xmlFreeParserCtxt(ctxt);
11320
11321 return(ret);
11322}
11323
11324/**
11325 * xmlParseEntity:
11326 * @filename: the filename
11327 *
11328 * parse an XML external entity out of context and build a tree.
11329 *
11330 * [78] extParsedEnt ::= TextDecl? content
11331 *
11332 * This correspond to a "Well Balanced" chunk
11333 *
11334 * Returns the resulting document tree
11335 */
11336
11337xmlDocPtr
11338xmlParseEntity(const char *filename) {
11339 return(xmlSAXParseEntity(NULL, filename));
11340}
Daniel Veillard81273902003-09-30 00:43:48 +000011341#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011342
11343/**
11344 * xmlCreateEntityParserCtxt:
11345 * @URL: the entity URL
11346 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011347 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011348 *
11349 * Create a parser context for an external entity
11350 * Automatic support for ZLIB/Compress compressed document is provided
11351 * by default if found at compile-time.
11352 *
11353 * Returns the new parser context or NULL
11354 */
11355xmlParserCtxtPtr
11356xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11357 const xmlChar *base) {
11358 xmlParserCtxtPtr ctxt;
11359 xmlParserInputPtr inputStream;
11360 char *directory = NULL;
11361 xmlChar *uri;
11362
11363 ctxt = xmlNewParserCtxt();
11364 if (ctxt == NULL) {
11365 return(NULL);
11366 }
11367
11368 uri = xmlBuildURI(URL, base);
11369
11370 if (uri == NULL) {
11371 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11372 if (inputStream == NULL) {
11373 xmlFreeParserCtxt(ctxt);
11374 return(NULL);
11375 }
11376
11377 inputPush(ctxt, inputStream);
11378
11379 if ((ctxt->directory == NULL) && (directory == NULL))
11380 directory = xmlParserGetDirectory((char *)URL);
11381 if ((ctxt->directory == NULL) && (directory != NULL))
11382 ctxt->directory = directory;
11383 } else {
11384 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11385 if (inputStream == NULL) {
11386 xmlFree(uri);
11387 xmlFreeParserCtxt(ctxt);
11388 return(NULL);
11389 }
11390
11391 inputPush(ctxt, inputStream);
11392
11393 if ((ctxt->directory == NULL) && (directory == NULL))
11394 directory = xmlParserGetDirectory((char *)uri);
11395 if ((ctxt->directory == NULL) && (directory != NULL))
11396 ctxt->directory = directory;
11397 xmlFree(uri);
11398 }
Owen Taylor3473f882001-02-23 17:55:21 +000011399 return(ctxt);
11400}
11401
11402/************************************************************************
11403 * *
11404 * Front ends when parsing from a file *
11405 * *
11406 ************************************************************************/
11407
11408/**
11409 * xmlCreateFileParserCtxt:
11410 * @filename: the filename
11411 *
11412 * Create a parser context for a file content.
11413 * Automatic support for ZLIB/Compress compressed document is provided
11414 * by default if found at compile-time.
11415 *
11416 * Returns the new parser context or NULL
11417 */
11418xmlParserCtxtPtr
11419xmlCreateFileParserCtxt(const char *filename)
11420{
11421 xmlParserCtxtPtr ctxt;
11422 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011423 char *directory = NULL;
11424
Owen Taylor3473f882001-02-23 17:55:21 +000011425 ctxt = xmlNewParserCtxt();
11426 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011427 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011428 return(NULL);
11429 }
11430
Igor Zlatkovicce076162003-02-23 13:39:39 +000011431
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011432 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011433 if (inputStream == NULL) {
11434 xmlFreeParserCtxt(ctxt);
11435 return(NULL);
11436 }
11437
Owen Taylor3473f882001-02-23 17:55:21 +000011438 inputPush(ctxt, inputStream);
11439 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011440 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011441 if ((ctxt->directory == NULL) && (directory != NULL))
11442 ctxt->directory = directory;
11443
11444 return(ctxt);
11445}
11446
Daniel Veillard81273902003-09-30 00:43:48 +000011447#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011448/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011449 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011450 * @sax: the SAX handler block
11451 * @filename: the filename
11452 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11453 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011454 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011455 *
11456 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11457 * compressed document is provided by default if found at compile-time.
11458 * It use the given SAX function block to handle the parsing callback.
11459 * If sax is NULL, fallback to the default DOM tree building routines.
11460 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011461 * User data (void *) is stored within the parser context in the
11462 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011463 *
Owen Taylor3473f882001-02-23 17:55:21 +000011464 * Returns the resulting document tree
11465 */
11466
11467xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011468xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11469 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011470 xmlDocPtr ret;
11471 xmlParserCtxtPtr ctxt;
11472 char *directory = NULL;
11473
Daniel Veillard635ef722001-10-29 11:48:19 +000011474 xmlInitParser();
11475
Owen Taylor3473f882001-02-23 17:55:21 +000011476 ctxt = xmlCreateFileParserCtxt(filename);
11477 if (ctxt == NULL) {
11478 return(NULL);
11479 }
11480 if (sax != NULL) {
11481 if (ctxt->sax != NULL)
11482 xmlFree(ctxt->sax);
11483 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011484 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011485 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011486 if (data!=NULL) {
11487 ctxt->_private=data;
11488 }
Owen Taylor3473f882001-02-23 17:55:21 +000011489
11490 if ((ctxt->directory == NULL) && (directory == NULL))
11491 directory = xmlParserGetDirectory(filename);
11492 if ((ctxt->directory == NULL) && (directory != NULL))
11493 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11494
Daniel Veillarddad3f682002-11-17 16:47:27 +000011495 ctxt->recovery = recovery;
11496
Owen Taylor3473f882001-02-23 17:55:21 +000011497 xmlParseDocument(ctxt);
11498
William M. Brackc07329e2003-09-08 01:57:30 +000011499 if ((ctxt->wellFormed) || recovery) {
11500 ret = ctxt->myDoc;
11501 if (ctxt->input->buf->compressed > 0)
11502 ret->compression = 9;
11503 else
11504 ret->compression = ctxt->input->buf->compressed;
11505 }
Owen Taylor3473f882001-02-23 17:55:21 +000011506 else {
11507 ret = NULL;
11508 xmlFreeDoc(ctxt->myDoc);
11509 ctxt->myDoc = NULL;
11510 }
11511 if (sax != NULL)
11512 ctxt->sax = NULL;
11513 xmlFreeParserCtxt(ctxt);
11514
11515 return(ret);
11516}
11517
11518/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011519 * xmlSAXParseFile:
11520 * @sax: the SAX handler block
11521 * @filename: the filename
11522 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11523 * documents
11524 *
11525 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11526 * compressed document is provided by default if found at compile-time.
11527 * It use the given SAX function block to handle the parsing callback.
11528 * If sax is NULL, fallback to the default DOM tree building routines.
11529 *
11530 * Returns the resulting document tree
11531 */
11532
11533xmlDocPtr
11534xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11535 int recovery) {
11536 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11537}
11538
11539/**
Owen Taylor3473f882001-02-23 17:55:21 +000011540 * xmlRecoverDoc:
11541 * @cur: a pointer to an array of xmlChar
11542 *
11543 * parse an XML in-memory document and build a tree.
11544 * In the case the document is not Well Formed, a tree is built anyway
11545 *
11546 * Returns the resulting document tree
11547 */
11548
11549xmlDocPtr
11550xmlRecoverDoc(xmlChar *cur) {
11551 return(xmlSAXParseDoc(NULL, cur, 1));
11552}
11553
11554/**
11555 * xmlParseFile:
11556 * @filename: the filename
11557 *
11558 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11559 * compressed document is provided by default if found at compile-time.
11560 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011561 * Returns the resulting document tree if the file was wellformed,
11562 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011563 */
11564
11565xmlDocPtr
11566xmlParseFile(const char *filename) {
11567 return(xmlSAXParseFile(NULL, filename, 0));
11568}
11569
11570/**
11571 * xmlRecoverFile:
11572 * @filename: the filename
11573 *
11574 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11575 * compressed document is provided by default if found at compile-time.
11576 * In the case the document is not Well Formed, a tree is built anyway
11577 *
11578 * Returns the resulting document tree
11579 */
11580
11581xmlDocPtr
11582xmlRecoverFile(const char *filename) {
11583 return(xmlSAXParseFile(NULL, filename, 1));
11584}
11585
11586
11587/**
11588 * xmlSetupParserForBuffer:
11589 * @ctxt: an XML parser context
11590 * @buffer: a xmlChar * buffer
11591 * @filename: a file name
11592 *
11593 * Setup the parser context to parse a new buffer; Clears any prior
11594 * contents from the parser context. The buffer parameter must not be
11595 * NULL, but the filename parameter can be
11596 */
11597void
11598xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11599 const char* filename)
11600{
11601 xmlParserInputPtr input;
11602
11603 input = xmlNewInputStream(ctxt);
11604 if (input == NULL) {
Daniel Veillard3487c8d2002-09-05 11:33:25 +000011605 xmlGenericError(xmlGenericErrorContext,
11606 "malloc");
Owen Taylor3473f882001-02-23 17:55:21 +000011607 xmlFree(ctxt);
11608 return;
11609 }
11610
11611 xmlClearParserCtxt(ctxt);
11612 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011613 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011614 input->base = buffer;
11615 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011616 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011617 inputPush(ctxt, input);
11618}
11619
11620/**
11621 * xmlSAXUserParseFile:
11622 * @sax: a SAX handler
11623 * @user_data: The user data returned on SAX callbacks
11624 * @filename: a file name
11625 *
11626 * parse an XML file and call the given SAX handler routines.
11627 * Automatic support for ZLIB/Compress compressed document is provided
11628 *
11629 * Returns 0 in case of success or a error number otherwise
11630 */
11631int
11632xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11633 const char *filename) {
11634 int ret = 0;
11635 xmlParserCtxtPtr ctxt;
11636
11637 ctxt = xmlCreateFileParserCtxt(filename);
11638 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011639#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011640 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011641#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011642 xmlFree(ctxt->sax);
11643 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011644 xmlDetectSAX2(ctxt);
11645
Owen Taylor3473f882001-02-23 17:55:21 +000011646 if (user_data != NULL)
11647 ctxt->userData = user_data;
11648
11649 xmlParseDocument(ctxt);
11650
11651 if (ctxt->wellFormed)
11652 ret = 0;
11653 else {
11654 if (ctxt->errNo != 0)
11655 ret = ctxt->errNo;
11656 else
11657 ret = -1;
11658 }
11659 if (sax != NULL)
11660 ctxt->sax = NULL;
11661 xmlFreeParserCtxt(ctxt);
11662
11663 return ret;
11664}
Daniel Veillard81273902003-09-30 00:43:48 +000011665#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011666
11667/************************************************************************
11668 * *
11669 * Front ends when parsing from memory *
11670 * *
11671 ************************************************************************/
11672
11673/**
11674 * xmlCreateMemoryParserCtxt:
11675 * @buffer: a pointer to a char array
11676 * @size: the size of the array
11677 *
11678 * Create a parser context for an XML in-memory document.
11679 *
11680 * Returns the new parser context or NULL
11681 */
11682xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011683xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011684 xmlParserCtxtPtr ctxt;
11685 xmlParserInputPtr input;
11686 xmlParserInputBufferPtr buf;
11687
11688 if (buffer == NULL)
11689 return(NULL);
11690 if (size <= 0)
11691 return(NULL);
11692
11693 ctxt = xmlNewParserCtxt();
11694 if (ctxt == NULL)
11695 return(NULL);
11696
Daniel Veillard53350552003-09-18 13:35:51 +000011697 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011698 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011699 if (buf == NULL) {
11700 xmlFreeParserCtxt(ctxt);
11701 return(NULL);
11702 }
Owen Taylor3473f882001-02-23 17:55:21 +000011703
11704 input = xmlNewInputStream(ctxt);
11705 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011706 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011707 xmlFreeParserCtxt(ctxt);
11708 return(NULL);
11709 }
11710
11711 input->filename = NULL;
11712 input->buf = buf;
11713 input->base = input->buf->buffer->content;
11714 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011715 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011716
11717 inputPush(ctxt, input);
11718 return(ctxt);
11719}
11720
Daniel Veillard81273902003-09-30 00:43:48 +000011721#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011722/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011723 * xmlSAXParseMemoryWithData:
11724 * @sax: the SAX handler block
11725 * @buffer: an pointer to a char array
11726 * @size: the size of the array
11727 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11728 * documents
11729 * @data: the userdata
11730 *
11731 * parse an XML in-memory block and use the given SAX function block
11732 * to handle the parsing callback. If sax is NULL, fallback to the default
11733 * DOM tree building routines.
11734 *
11735 * User data (void *) is stored within the parser context in the
11736 * context's _private member, so it is available nearly everywhere in libxml
11737 *
11738 * Returns the resulting document tree
11739 */
11740
11741xmlDocPtr
11742xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11743 int size, int recovery, void *data) {
11744 xmlDocPtr ret;
11745 xmlParserCtxtPtr ctxt;
11746
11747 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11748 if (ctxt == NULL) return(NULL);
11749 if (sax != NULL) {
11750 if (ctxt->sax != NULL)
11751 xmlFree(ctxt->sax);
11752 ctxt->sax = sax;
11753 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011754 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011755 if (data!=NULL) {
11756 ctxt->_private=data;
11757 }
11758
Daniel Veillardadba5f12003-04-04 16:09:01 +000011759 ctxt->recovery = recovery;
11760
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011761 xmlParseDocument(ctxt);
11762
11763 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11764 else {
11765 ret = NULL;
11766 xmlFreeDoc(ctxt->myDoc);
11767 ctxt->myDoc = NULL;
11768 }
11769 if (sax != NULL)
11770 ctxt->sax = NULL;
11771 xmlFreeParserCtxt(ctxt);
11772
11773 return(ret);
11774}
11775
11776/**
Owen Taylor3473f882001-02-23 17:55:21 +000011777 * xmlSAXParseMemory:
11778 * @sax: the SAX handler block
11779 * @buffer: an pointer to a char array
11780 * @size: the size of the array
11781 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11782 * documents
11783 *
11784 * parse an XML in-memory block and use the given SAX function block
11785 * to handle the parsing callback. If sax is NULL, fallback to the default
11786 * DOM tree building routines.
11787 *
11788 * Returns the resulting document tree
11789 */
11790xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011791xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11792 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011793 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011794}
11795
11796/**
11797 * xmlParseMemory:
11798 * @buffer: an pointer to a char array
11799 * @size: the size of the array
11800 *
11801 * parse an XML in-memory block and build a tree.
11802 *
11803 * Returns the resulting document tree
11804 */
11805
Daniel Veillard50822cb2001-07-26 20:05:51 +000011806xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011807 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11808}
11809
11810/**
11811 * xmlRecoverMemory:
11812 * @buffer: an pointer to a char array
11813 * @size: the size of the array
11814 *
11815 * parse an XML in-memory block and build a tree.
11816 * In the case the document is not Well Formed, a tree is built anyway
11817 *
11818 * Returns the resulting document tree
11819 */
11820
Daniel Veillard50822cb2001-07-26 20:05:51 +000011821xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011822 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11823}
11824
11825/**
11826 * xmlSAXUserParseMemory:
11827 * @sax: a SAX handler
11828 * @user_data: The user data returned on SAX callbacks
11829 * @buffer: an in-memory XML document input
11830 * @size: the length of the XML document in bytes
11831 *
11832 * A better SAX parsing routine.
11833 * parse an XML in-memory buffer and call the given SAX handler routines.
11834 *
11835 * Returns 0 in case of success or a error number otherwise
11836 */
11837int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011838 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011839 int ret = 0;
11840 xmlParserCtxtPtr ctxt;
11841 xmlSAXHandlerPtr oldsax = NULL;
11842
Daniel Veillard9e923512002-08-14 08:48:52 +000011843 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011844 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11845 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011846 oldsax = ctxt->sax;
11847 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011848 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011849 if (user_data != NULL)
11850 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011851
11852 xmlParseDocument(ctxt);
11853
11854 if (ctxt->wellFormed)
11855 ret = 0;
11856 else {
11857 if (ctxt->errNo != 0)
11858 ret = ctxt->errNo;
11859 else
11860 ret = -1;
11861 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011862 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011863 xmlFreeParserCtxt(ctxt);
11864
11865 return ret;
11866}
Daniel Veillard81273902003-09-30 00:43:48 +000011867#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011868
11869/**
11870 * xmlCreateDocParserCtxt:
11871 * @cur: a pointer to an array of xmlChar
11872 *
11873 * Creates a parser context for an XML in-memory document.
11874 *
11875 * Returns the new parser context or NULL
11876 */
11877xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011878xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011879 int len;
11880
11881 if (cur == NULL)
11882 return(NULL);
11883 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011884 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011885}
11886
Daniel Veillard81273902003-09-30 00:43:48 +000011887#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011888/**
11889 * xmlSAXParseDoc:
11890 * @sax: the SAX handler block
11891 * @cur: a pointer to an array of xmlChar
11892 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11893 * documents
11894 *
11895 * parse an XML in-memory document and build a tree.
11896 * It use the given SAX function block to handle the parsing callback.
11897 * If sax is NULL, fallback to the default DOM tree building routines.
11898 *
11899 * Returns the resulting document tree
11900 */
11901
11902xmlDocPtr
11903xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11904 xmlDocPtr ret;
11905 xmlParserCtxtPtr ctxt;
11906
11907 if (cur == NULL) return(NULL);
11908
11909
11910 ctxt = xmlCreateDocParserCtxt(cur);
11911 if (ctxt == NULL) return(NULL);
11912 if (sax != NULL) {
11913 ctxt->sax = sax;
11914 ctxt->userData = NULL;
11915 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011916 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011917
11918 xmlParseDocument(ctxt);
11919 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11920 else {
11921 ret = NULL;
11922 xmlFreeDoc(ctxt->myDoc);
11923 ctxt->myDoc = NULL;
11924 }
11925 if (sax != NULL)
11926 ctxt->sax = NULL;
11927 xmlFreeParserCtxt(ctxt);
11928
11929 return(ret);
11930}
11931
11932/**
11933 * xmlParseDoc:
11934 * @cur: a pointer to an array of xmlChar
11935 *
11936 * parse an XML in-memory document and build a tree.
11937 *
11938 * Returns the resulting document tree
11939 */
11940
11941xmlDocPtr
11942xmlParseDoc(xmlChar *cur) {
11943 return(xmlSAXParseDoc(NULL, cur, 0));
11944}
Daniel Veillard81273902003-09-30 00:43:48 +000011945#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011946
Daniel Veillard81273902003-09-30 00:43:48 +000011947#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011948/************************************************************************
11949 * *
11950 * Specific function to keep track of entities references *
11951 * and used by the XSLT debugger *
11952 * *
11953 ************************************************************************/
11954
11955static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11956
11957/**
11958 * xmlAddEntityReference:
11959 * @ent : A valid entity
11960 * @firstNode : A valid first node for children of entity
11961 * @lastNode : A valid last node of children entity
11962 *
11963 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11964 */
11965static void
11966xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11967 xmlNodePtr lastNode)
11968{
11969 if (xmlEntityRefFunc != NULL) {
11970 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11971 }
11972}
11973
11974
11975/**
11976 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011977 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011978 *
11979 * Set the function to call call back when a xml reference has been made
11980 */
11981void
11982xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11983{
11984 xmlEntityRefFunc = func;
11985}
Daniel Veillard81273902003-09-30 00:43:48 +000011986#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011987
11988/************************************************************************
11989 * *
11990 * Miscellaneous *
11991 * *
11992 ************************************************************************/
11993
11994#ifdef LIBXML_XPATH_ENABLED
11995#include <libxml/xpath.h>
11996#endif
11997
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011998extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011999static int xmlParserInitialized = 0;
12000
12001/**
12002 * xmlInitParser:
12003 *
12004 * Initialization function for the XML parser.
12005 * This is not reentrant. Call once before processing in case of
12006 * use in multithreaded programs.
12007 */
12008
12009void
12010xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012011 if (xmlParserInitialized != 0)
12012 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012013
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012014 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12015 (xmlGenericError == NULL))
12016 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012017 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012018 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012019 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012020 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012021 xmlDefaultSAXHandlerInit();
12022 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012023#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012024 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012025#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012026#ifdef LIBXML_HTML_ENABLED
12027 htmlInitAutoClose();
12028 htmlDefaultSAXHandlerInit();
12029#endif
12030#ifdef LIBXML_XPATH_ENABLED
12031 xmlXPathInit();
12032#endif
12033 xmlParserInitialized = 1;
12034}
12035
12036/**
12037 * xmlCleanupParser:
12038 *
12039 * Cleanup function for the XML parser. It tries to reclaim all
12040 * parsing related global memory allocated for the parser processing.
12041 * It doesn't deallocate any document related memory. Calling this
12042 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000012043 * One should call xmlCleanupParser() only when the process has
12044 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012045 */
12046
12047void
12048xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012049 if (!xmlParserInitialized)
12050 return;
12051
Owen Taylor3473f882001-02-23 17:55:21 +000012052 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012053#ifdef LIBXML_CATALOG_ENABLED
12054 xmlCatalogCleanup();
12055#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000012056 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012057 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012058 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000012059 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012060}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012061
12062/************************************************************************
12063 * *
12064 * New set (2.6.0) of simpler and more flexible APIs *
12065 * *
12066 ************************************************************************/
12067
12068/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012069 * DICT_FREE:
12070 * @str: a string
12071 *
12072 * Free a string if it is not owned by the "dict" dictionnary in the
12073 * current scope
12074 */
12075#define DICT_FREE(str) \
12076 if ((str) && ((!dict) || \
12077 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12078 xmlFree((char *)(str));
12079
12080/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012081 * xmlCtxtReset:
12082 * @ctxt: an XML parser context
12083 *
12084 * Reset a parser context
12085 */
12086void
12087xmlCtxtReset(xmlParserCtxtPtr ctxt)
12088{
12089 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012090 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012091
12092 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12093 xmlFreeInputStream(input);
12094 }
12095 ctxt->inputNr = 0;
12096 ctxt->input = NULL;
12097
12098 ctxt->spaceNr = 0;
12099 ctxt->spaceTab[0] = -1;
12100 ctxt->space = &ctxt->spaceTab[0];
12101
12102
12103 ctxt->nodeNr = 0;
12104 ctxt->node = NULL;
12105
12106 ctxt->nameNr = 0;
12107 ctxt->name = NULL;
12108
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012109 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012110 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012111 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012112 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012113 DICT_FREE(ctxt->directory);
12114 ctxt->directory = NULL;
12115 DICT_FREE(ctxt->extSubURI);
12116 ctxt->extSubURI = NULL;
12117 DICT_FREE(ctxt->extSubSystem);
12118 ctxt->extSubSystem = NULL;
12119 if (ctxt->myDoc != NULL)
12120 xmlFreeDoc(ctxt->myDoc);
12121 ctxt->myDoc = NULL;
12122
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012123 ctxt->standalone = -1;
12124 ctxt->hasExternalSubset = 0;
12125 ctxt->hasPErefs = 0;
12126 ctxt->html = 0;
12127 ctxt->external = 0;
12128 ctxt->instate = XML_PARSER_START;
12129 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012130
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012131 ctxt->wellFormed = 1;
12132 ctxt->nsWellFormed = 1;
12133 ctxt->valid = 1;
12134 ctxt->vctxt.userData = ctxt;
12135 ctxt->vctxt.error = xmlParserValidityError;
12136 ctxt->vctxt.warning = xmlParserValidityWarning;
12137 ctxt->record_info = 0;
12138 ctxt->nbChars = 0;
12139 ctxt->checkIndex = 0;
12140 ctxt->inSubset = 0;
12141 ctxt->errNo = XML_ERR_OK;
12142 ctxt->depth = 0;
12143 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12144 ctxt->catalogs = NULL;
12145 xmlInitNodeInfoSeq(&ctxt->node_seq);
12146
12147 if (ctxt->attsDefault != NULL) {
12148 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12149 ctxt->attsDefault = NULL;
12150 }
12151 if (ctxt->attsSpecial != NULL) {
12152 xmlHashFree(ctxt->attsSpecial, NULL);
12153 ctxt->attsSpecial = NULL;
12154 }
12155
Daniel Veillard4432df22003-09-28 18:58:27 +000012156#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012157 if (ctxt->catalogs != NULL)
12158 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012159#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012160}
12161
12162/**
12163 * xmlCtxtUseOptions:
12164 * @ctxt: an XML parser context
12165 * @options: a combination of xmlParserOption(s)
12166 *
12167 * Applies the options to the parser context
12168 *
12169 * Returns 0 in case of success, the set of unknown or unimplemented options
12170 * in case of error.
12171 */
12172int
12173xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12174{
12175 if (options & XML_PARSE_RECOVER) {
12176 ctxt->recovery = 1;
12177 options -= XML_PARSE_RECOVER;
12178 } else
12179 ctxt->recovery = 0;
12180 if (options & XML_PARSE_DTDLOAD) {
12181 ctxt->loadsubset = XML_DETECT_IDS;
12182 options -= XML_PARSE_DTDLOAD;
12183 } else
12184 ctxt->loadsubset = 0;
12185 if (options & XML_PARSE_DTDATTR) {
12186 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12187 options -= XML_PARSE_DTDATTR;
12188 }
12189 if (options & XML_PARSE_NOENT) {
12190 ctxt->replaceEntities = 1;
12191 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12192 options -= XML_PARSE_NOENT;
12193 } else
12194 ctxt->replaceEntities = 0;
12195 if (options & XML_PARSE_NOWARNING) {
12196 ctxt->sax->warning = NULL;
12197 options -= XML_PARSE_NOWARNING;
12198 }
12199 if (options & XML_PARSE_NOERROR) {
12200 ctxt->sax->error = NULL;
12201 ctxt->sax->fatalError = NULL;
12202 options -= XML_PARSE_NOERROR;
12203 }
12204 if (options & XML_PARSE_PEDANTIC) {
12205 ctxt->pedantic = 1;
12206 options -= XML_PARSE_PEDANTIC;
12207 } else
12208 ctxt->pedantic = 0;
12209 if (options & XML_PARSE_NOBLANKS) {
12210 ctxt->keepBlanks = 0;
12211 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12212 options -= XML_PARSE_NOBLANKS;
12213 } else
12214 ctxt->keepBlanks = 1;
12215 if (options & XML_PARSE_DTDVALID) {
12216 ctxt->validate = 1;
12217 if (options & XML_PARSE_NOWARNING)
12218 ctxt->vctxt.warning = NULL;
12219 if (options & XML_PARSE_NOERROR)
12220 ctxt->vctxt.error = NULL;
12221 options -= XML_PARSE_DTDVALID;
12222 } else
12223 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012224#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012225 if (options & XML_PARSE_SAX1) {
12226 ctxt->sax->startElement = xmlSAX2StartElement;
12227 ctxt->sax->endElement = xmlSAX2EndElement;
12228 ctxt->sax->startElementNs = NULL;
12229 ctxt->sax->endElementNs = NULL;
12230 ctxt->sax->initialized = 1;
12231 options -= XML_PARSE_SAX1;
12232 }
Daniel Veillard81273902003-09-30 00:43:48 +000012233#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012234 if (options & XML_PARSE_NODICT) {
12235 ctxt->dictNames = 0;
12236 options -= XML_PARSE_NODICT;
12237 } else {
12238 ctxt->dictNames = 1;
12239 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012240 if (options & XML_PARSE_NOCDATA) {
12241 ctxt->sax->cdataBlock = NULL;
12242 options -= XML_PARSE_NOCDATA;
12243 }
12244 if (options & XML_PARSE_NSCLEAN) {
12245 ctxt->options |= XML_PARSE_NSCLEAN;
12246 options -= XML_PARSE_NSCLEAN;
12247 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012248 return (options);
12249}
12250
12251/**
12252 * xmlDoRead:
12253 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012254 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012255 * @encoding: the document encoding, or NULL
12256 * @options: a combination of xmlParserOption(s)
12257 * @reuse: keep the context for reuse
12258 *
12259 * Common front-end for the xmlRead functions
12260 *
12261 * Returns the resulting document tree or NULL
12262 */
12263static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012264xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12265 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012266{
12267 xmlDocPtr ret;
12268
12269 xmlCtxtUseOptions(ctxt, options);
12270 if (encoding != NULL) {
12271 xmlCharEncodingHandlerPtr hdlr;
12272
12273 hdlr = xmlFindCharEncodingHandler(encoding);
12274 if (hdlr != NULL)
12275 xmlSwitchToEncoding(ctxt, hdlr);
12276 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012277 if ((URL != NULL) && (ctxt->input != NULL) &&
12278 (ctxt->input->filename == NULL))
12279 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012280 xmlParseDocument(ctxt);
12281 if ((ctxt->wellFormed) || ctxt->recovery)
12282 ret = ctxt->myDoc;
12283 else {
12284 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012285 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012286 if ((ctxt->dictNames) &&
12287 (ctxt->myDoc->dict == ctxt->dict))
12288 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012289 xmlFreeDoc(ctxt->myDoc);
12290 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012291 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012292 ctxt->myDoc = NULL;
12293 if (!reuse) {
12294 if ((ctxt->dictNames) &&
12295 (ret != NULL) &&
12296 (ret->dict == ctxt->dict))
12297 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012298 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012299 } else {
12300 /* Must duplicate the reference to the dictionary */
12301 if ((ctxt->dictNames) &&
12302 (ret != NULL) &&
12303 (ret->dict == ctxt->dict))
12304 xmlDictReference(ctxt->dict);
12305 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012306
12307 return (ret);
12308}
12309
12310/**
12311 * xmlReadDoc:
12312 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012313 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012314 * @encoding: the document encoding, or NULL
12315 * @options: a combination of xmlParserOption(s)
12316 *
12317 * parse an XML in-memory document and build a tree.
12318 *
12319 * Returns the resulting document tree
12320 */
12321xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012322xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012323{
12324 xmlParserCtxtPtr ctxt;
12325
12326 if (cur == NULL)
12327 return (NULL);
12328
12329 ctxt = xmlCreateDocParserCtxt(cur);
12330 if (ctxt == NULL)
12331 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012332 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012333}
12334
12335/**
12336 * xmlReadFile:
12337 * @filename: a file or URL
12338 * @encoding: the document encoding, or NULL
12339 * @options: a combination of xmlParserOption(s)
12340 *
12341 * parse an XML file from the filesystem or the network.
12342 *
12343 * Returns the resulting document tree
12344 */
12345xmlDocPtr
12346xmlReadFile(const char *filename, const char *encoding, int options)
12347{
12348 xmlParserCtxtPtr ctxt;
12349
12350 ctxt = xmlCreateFileParserCtxt(filename);
12351 if (ctxt == NULL)
12352 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012353 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012354}
12355
12356/**
12357 * xmlReadMemory:
12358 * @buffer: a pointer to a char array
12359 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012360 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012361 * @encoding: the document encoding, or NULL
12362 * @options: a combination of xmlParserOption(s)
12363 *
12364 * parse an XML in-memory document and build a tree.
12365 *
12366 * Returns the resulting document tree
12367 */
12368xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012369xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012370{
12371 xmlParserCtxtPtr ctxt;
12372
12373 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12374 if (ctxt == NULL)
12375 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012376 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012377}
12378
12379/**
12380 * xmlReadFd:
12381 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012382 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012383 * @encoding: the document encoding, or NULL
12384 * @options: a combination of xmlParserOption(s)
12385 *
12386 * parse an XML from a file descriptor and build a tree.
12387 *
12388 * Returns the resulting document tree
12389 */
12390xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012391xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012392{
12393 xmlParserCtxtPtr ctxt;
12394 xmlParserInputBufferPtr input;
12395 xmlParserInputPtr stream;
12396
12397 if (fd < 0)
12398 return (NULL);
12399
12400 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12401 if (input == NULL)
12402 return (NULL);
12403 ctxt = xmlNewParserCtxt();
12404 if (ctxt == NULL) {
12405 xmlFreeParserInputBuffer(input);
12406 return (NULL);
12407 }
12408 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12409 if (stream == NULL) {
12410 xmlFreeParserInputBuffer(input);
12411 xmlFreeParserCtxt(ctxt);
12412 return (NULL);
12413 }
12414 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012415 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012416}
12417
12418/**
12419 * xmlReadIO:
12420 * @ioread: an I/O read function
12421 * @ioclose: an I/O close function
12422 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012423 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012424 * @encoding: the document encoding, or NULL
12425 * @options: a combination of xmlParserOption(s)
12426 *
12427 * parse an XML document from I/O functions and source and build a tree.
12428 *
12429 * Returns the resulting document tree
12430 */
12431xmlDocPtr
12432xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012433 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012434{
12435 xmlParserCtxtPtr ctxt;
12436 xmlParserInputBufferPtr input;
12437 xmlParserInputPtr stream;
12438
12439 if (ioread == NULL)
12440 return (NULL);
12441
12442 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12443 XML_CHAR_ENCODING_NONE);
12444 if (input == NULL)
12445 return (NULL);
12446 ctxt = xmlNewParserCtxt();
12447 if (ctxt == NULL) {
12448 xmlFreeParserInputBuffer(input);
12449 return (NULL);
12450 }
12451 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12452 if (stream == NULL) {
12453 xmlFreeParserInputBuffer(input);
12454 xmlFreeParserCtxt(ctxt);
12455 return (NULL);
12456 }
12457 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012458 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012459}
12460
12461/**
12462 * xmlCtxtReadDoc:
12463 * @ctxt: an XML parser context
12464 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012465 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012466 * @encoding: the document encoding, or NULL
12467 * @options: a combination of xmlParserOption(s)
12468 *
12469 * parse an XML in-memory document and build a tree.
12470 * This reuses the existing @ctxt parser context
12471 *
12472 * Returns the resulting document tree
12473 */
12474xmlDocPtr
12475xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012476 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012477{
12478 xmlParserInputPtr stream;
12479
12480 if (cur == NULL)
12481 return (NULL);
12482 if (ctxt == NULL)
12483 return (NULL);
12484
12485 xmlCtxtReset(ctxt);
12486
12487 stream = xmlNewStringInputStream(ctxt, cur);
12488 if (stream == NULL) {
12489 return (NULL);
12490 }
12491 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012492 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012493}
12494
12495/**
12496 * xmlCtxtReadFile:
12497 * @ctxt: an XML parser context
12498 * @filename: a file or URL
12499 * @encoding: the document encoding, or NULL
12500 * @options: a combination of xmlParserOption(s)
12501 *
12502 * parse an XML file from the filesystem or the network.
12503 * This reuses the existing @ctxt parser context
12504 *
12505 * Returns the resulting document tree
12506 */
12507xmlDocPtr
12508xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12509 const char *encoding, int options)
12510{
12511 xmlParserInputPtr stream;
12512
12513 if (filename == NULL)
12514 return (NULL);
12515 if (ctxt == NULL)
12516 return (NULL);
12517
12518 xmlCtxtReset(ctxt);
12519
12520 stream = xmlNewInputFromFile(ctxt, filename);
12521 if (stream == NULL) {
12522 return (NULL);
12523 }
12524 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012525 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012526}
12527
12528/**
12529 * xmlCtxtReadMemory:
12530 * @ctxt: an XML parser context
12531 * @buffer: a pointer to a char array
12532 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012533 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012534 * @encoding: the document encoding, or NULL
12535 * @options: a combination of xmlParserOption(s)
12536 *
12537 * parse an XML in-memory document and build a tree.
12538 * This reuses the existing @ctxt parser context
12539 *
12540 * Returns the resulting document tree
12541 */
12542xmlDocPtr
12543xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012544 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012545{
12546 xmlParserInputBufferPtr input;
12547 xmlParserInputPtr stream;
12548
12549 if (ctxt == NULL)
12550 return (NULL);
12551 if (buffer == NULL)
12552 return (NULL);
12553
12554 xmlCtxtReset(ctxt);
12555
12556 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12557 if (input == NULL) {
12558 return(NULL);
12559 }
12560
12561 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12562 if (stream == NULL) {
12563 xmlFreeParserInputBuffer(input);
12564 return(NULL);
12565 }
12566
12567 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012568 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012569}
12570
12571/**
12572 * xmlCtxtReadFd:
12573 * @ctxt: an XML parser context
12574 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012575 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012576 * @encoding: the document encoding, or NULL
12577 * @options: a combination of xmlParserOption(s)
12578 *
12579 * parse an XML from a file descriptor and build a tree.
12580 * This reuses the existing @ctxt parser context
12581 *
12582 * Returns the resulting document tree
12583 */
12584xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012585xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12586 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012587{
12588 xmlParserInputBufferPtr input;
12589 xmlParserInputPtr stream;
12590
12591 if (fd < 0)
12592 return (NULL);
12593 if (ctxt == NULL)
12594 return (NULL);
12595
12596 xmlCtxtReset(ctxt);
12597
12598
12599 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12600 if (input == NULL)
12601 return (NULL);
12602 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12603 if (stream == NULL) {
12604 xmlFreeParserInputBuffer(input);
12605 return (NULL);
12606 }
12607 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012608 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012609}
12610
12611/**
12612 * xmlCtxtReadIO:
12613 * @ctxt: an XML parser context
12614 * @ioread: an I/O read function
12615 * @ioclose: an I/O close function
12616 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012617 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012618 * @encoding: the document encoding, or NULL
12619 * @options: a combination of xmlParserOption(s)
12620 *
12621 * parse an XML document from I/O functions and source and build a tree.
12622 * This reuses the existing @ctxt parser context
12623 *
12624 * Returns the resulting document tree
12625 */
12626xmlDocPtr
12627xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12628 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012629 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012630 const char *encoding, int options)
12631{
12632 xmlParserInputBufferPtr input;
12633 xmlParserInputPtr stream;
12634
12635 if (ioread == NULL)
12636 return (NULL);
12637 if (ctxt == NULL)
12638 return (NULL);
12639
12640 xmlCtxtReset(ctxt);
12641
12642 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12643 XML_CHAR_ENCODING_NONE);
12644 if (input == NULL)
12645 return (NULL);
12646 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12647 if (stream == NULL) {
12648 xmlFreeParserInputBuffer(input);
12649 return (NULL);
12650 }
12651 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012652 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012653}