blob: ea9571283e2c5d62a167310a6e6834cbec228621 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 /* nasty but well=formed
2496 if ((c == ':') && (*cur == 0)) {
2497 return(xmlStrdup(name));
2498 } */
2499
Owen Taylor3473f882001-02-23 17:55:21 +00002500 if (buffer == NULL)
2501 ret = xmlStrndup(buf, len);
2502 else {
2503 ret = buffer;
2504 buffer = NULL;
2505 max = XML_MAX_NAMELEN;
2506 }
2507
2508
2509 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002510 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002511 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002512 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002513 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 }
Owen Taylor3473f882001-02-23 17:55:21 +00002515 len = 0;
2516
Daniel Veillardbb284f42002-10-16 18:02:47 +00002517 /*
2518 * Check that the first character is proper to start
2519 * a new name
2520 */
2521 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2522 ((c >= 0x41) && (c <= 0x5A)) ||
2523 (c == '_') || (c == ':'))) {
2524 int l;
2525 int first = CUR_SCHAR(cur, l);
2526
2527 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002528 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002529 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 }
2532 }
2533 cur++;
2534
Owen Taylor3473f882001-02-23 17:55:21 +00002535 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2536 buf[len++] = c;
2537 c = *cur++;
2538 }
2539 if (len >= max) {
2540 /*
2541 * Okay someone managed to make a huge name, so he's ready to pay
2542 * for the processing speed.
2543 */
2544 max = len * 2;
2545
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002546 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002548 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002549 return(NULL);
2550 }
2551 memcpy(buffer, buf, len);
2552 while (c != 0) { /* tested bigname2.xml */
2553 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002554 xmlChar *tmp;
2555
Owen Taylor3473f882001-02-23 17:55:21 +00002556 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002557 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002560 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002562 return(NULL);
2563 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002564 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002565 }
2566 buffer[len++] = c;
2567 c = *cur++;
2568 }
2569 buffer[len] = 0;
2570 }
2571
2572 if (buffer == NULL)
2573 ret = xmlStrndup(buf, len);
2574 else {
2575 ret = buffer;
2576 }
2577 }
2578
2579 return(ret);
2580}
2581
2582/************************************************************************
2583 * *
2584 * The parser itself *
2585 * Relates to http://www.w3.org/TR/REC-xml *
2586 * *
2587 ************************************************************************/
2588
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002589static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002590static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002591 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002592
Owen Taylor3473f882001-02-23 17:55:21 +00002593/**
2594 * xmlParseName:
2595 * @ctxt: an XML parser context
2596 *
2597 * parse an XML name.
2598 *
2599 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2600 * CombiningChar | Extender
2601 *
2602 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2603 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002604 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002605 *
2606 * Returns the Name parsed or NULL
2607 */
2608
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002609const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002610xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002612 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002613 int count = 0;
2614
2615 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002616
2617 /*
2618 * Accelerator for simple ASCII names
2619 */
2620 in = ctxt->input->cur;
2621 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2622 ((*in >= 0x41) && (*in <= 0x5A)) ||
2623 (*in == '_') || (*in == ':')) {
2624 in++;
2625 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2626 ((*in >= 0x41) && (*in <= 0x5A)) ||
2627 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002628 (*in == '_') || (*in == '-') ||
2629 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002630 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002631 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002633 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002635 ctxt->nbChars += count;
2636 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002637 if (ret == NULL)
2638 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002639 return(ret);
2640 }
2641 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002642 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002643}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002644
Daniel Veillard46de64e2002-05-29 08:21:33 +00002645/**
2646 * xmlParseNameAndCompare:
2647 * @ctxt: an XML parser context
2648 *
2649 * parse an XML name and compares for match
2650 * (specialized for endtag parsing)
2651 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002652 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2653 * and the name for mismatch
2654 */
2655
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002656static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002657xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002658 register const xmlChar *cmp = other;
2659 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002660 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002661
2662 GROW;
2663
2664 in = ctxt->input->cur;
2665 while (*in != 0 && *in == *cmp) {
2666 ++in;
2667 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002668 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002669 }
William M. Brack76e95df2003-10-18 16:20:14 +00002670 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 /* success */
2672 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002673 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002674 }
2675 /* failure (or end of input buffer), check with full function */
2676 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002677 /* strings coming from the dictionnary direct compare possible */
2678 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002679 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002680 }
2681 return ret;
2682}
2683
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002685xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002686 int len = 0, l;
2687 int c;
2688 int count = 0;
2689
2690 /*
2691 * Handler for more complex cases
2692 */
2693 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002694 c = CUR_CHAR(l);
2695 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2696 (!IS_LETTER(c) && (c != '_') &&
2697 (c != ':'))) {
2698 return(NULL);
2699 }
2700
2701 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002702 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002703 (c == '.') || (c == '-') ||
2704 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002705 (IS_COMBINING(c)) ||
2706 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (count++ > 100) {
2708 count = 0;
2709 GROW;
2710 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002711 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002712 NEXTL(l);
2713 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002714 }
Daniel Veillard96688262005-08-23 18:14:12 +00002715 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2716 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002717 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002718}
2719
2720/**
2721 * xmlParseStringName:
2722 * @ctxt: an XML parser context
2723 * @str: a pointer to the string pointer (IN/OUT)
2724 *
2725 * parse an XML name.
2726 *
2727 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2728 * CombiningChar | Extender
2729 *
2730 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2731 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002732 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002733 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002734 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002735 * is updated to the current location in the string.
2736 */
2737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002738static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002739xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2740 xmlChar buf[XML_MAX_NAMELEN + 5];
2741 const xmlChar *cur = *str;
2742 int len = 0, l;
2743 int c;
2744
2745 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002746 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002747 (c != ':')) {
2748 return(NULL);
2749 }
2750
William M. Brack871611b2003-10-18 04:53:14 +00002751 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002752 (c == '.') || (c == '-') ||
2753 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002754 (IS_COMBINING(c)) ||
2755 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002756 COPY_BUF(l,buf,len,c);
2757 cur += l;
2758 c = CUR_SCHAR(cur, l);
2759 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2760 /*
2761 * Okay someone managed to make a huge name, so he's ready to pay
2762 * for the processing speed.
2763 */
2764 xmlChar *buffer;
2765 int max = len * 2;
2766
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002767 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002768 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002769 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002770 return(NULL);
2771 }
2772 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002773 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002774 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002775 (c == '.') || (c == '-') ||
2776 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002777 (IS_COMBINING(c)) ||
2778 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002779 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002780 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002781 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002785 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002787 return(NULL);
2788 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002789 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002790 }
2791 COPY_BUF(l,buffer,len,c);
2792 cur += l;
2793 c = CUR_SCHAR(cur, l);
2794 }
2795 buffer[len] = 0;
2796 *str = cur;
2797 return(buffer);
2798 }
2799 }
2800 *str = cur;
2801 return(xmlStrndup(buf, len));
2802}
2803
2804/**
2805 * xmlParseNmtoken:
2806 * @ctxt: an XML parser context
2807 *
2808 * parse an XML Nmtoken.
2809 *
2810 * [7] Nmtoken ::= (NameChar)+
2811 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002812 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002813 *
2814 * Returns the Nmtoken parsed or NULL
2815 */
2816
2817xmlChar *
2818xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2819 xmlChar buf[XML_MAX_NAMELEN + 5];
2820 int len = 0, l;
2821 int c;
2822 int count = 0;
2823
2824 GROW;
2825 c = CUR_CHAR(l);
2826
William M. Brack871611b2003-10-18 04:53:14 +00002827 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002828 (c == '.') || (c == '-') ||
2829 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002830 (IS_COMBINING(c)) ||
2831 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (count++ > 100) {
2833 count = 0;
2834 GROW;
2835 }
2836 COPY_BUF(l,buf,len,c);
2837 NEXTL(l);
2838 c = CUR_CHAR(l);
2839 if (len >= XML_MAX_NAMELEN) {
2840 /*
2841 * Okay someone managed to make a huge token, so he's ready to pay
2842 * for the processing speed.
2843 */
2844 xmlChar *buffer;
2845 int max = len * 2;
2846
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002847 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002848 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002849 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002850 return(NULL);
2851 }
2852 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002853 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002854 (c == '.') || (c == '-') ||
2855 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002856 (IS_COMBINING(c)) ||
2857 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002858 if (count++ > 100) {
2859 count = 0;
2860 GROW;
2861 }
2862 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002863 xmlChar *tmp;
2864
Owen Taylor3473f882001-02-23 17:55:21 +00002865 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002866 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002869 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002871 return(NULL);
2872 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002873 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002874 }
2875 COPY_BUF(l,buffer,len,c);
2876 NEXTL(l);
2877 c = CUR_CHAR(l);
2878 }
2879 buffer[len] = 0;
2880 return(buffer);
2881 }
2882 }
2883 if (len == 0)
2884 return(NULL);
2885 return(xmlStrndup(buf, len));
2886}
2887
2888/**
2889 * xmlParseEntityValue:
2890 * @ctxt: an XML parser context
2891 * @orig: if non-NULL store a copy of the original entity value
2892 *
2893 * parse a value for ENTITY declarations
2894 *
2895 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2896 * "'" ([^%&'] | PEReference | Reference)* "'"
2897 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002898 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002899 */
2900
2901xmlChar *
2902xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2903 xmlChar *buf = NULL;
2904 int len = 0;
2905 int size = XML_PARSER_BUFFER_SIZE;
2906 int c, l;
2907 xmlChar stop;
2908 xmlChar *ret = NULL;
2909 const xmlChar *cur = NULL;
2910 xmlParserInputPtr input;
2911
2912 if (RAW == '"') stop = '"';
2913 else if (RAW == '\'') stop = '\'';
2914 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002915 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002916 return(NULL);
2917 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002918 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002919 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002920 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002921 return(NULL);
2922 }
2923
2924 /*
2925 * The content of the entity definition is copied in a buffer.
2926 */
2927
2928 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2929 input = ctxt->input;
2930 GROW;
2931 NEXT;
2932 c = CUR_CHAR(l);
2933 /*
2934 * NOTE: 4.4.5 Included in Literal
2935 * When a parameter entity reference appears in a literal entity
2936 * value, ... a single or double quote character in the replacement
2937 * text is always treated as a normal data character and will not
2938 * terminate the literal.
2939 * In practice it means we stop the loop only when back at parsing
2940 * the initial entity and the quote is found
2941 */
William M. Brack871611b2003-10-18 04:53:14 +00002942 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002943 (ctxt->input != input))) {
2944 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002945 xmlChar *tmp;
2946
Owen Taylor3473f882001-02-23 17:55:21 +00002947 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002948 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2949 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002950 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002951 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002952 return(NULL);
2953 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002954 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002955 }
2956 COPY_BUF(l,buf,len,c);
2957 NEXTL(l);
2958 /*
2959 * Pop-up of finished entities.
2960 */
2961 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2962 xmlPopInput(ctxt);
2963
2964 GROW;
2965 c = CUR_CHAR(l);
2966 if (c == 0) {
2967 GROW;
2968 c = CUR_CHAR(l);
2969 }
2970 }
2971 buf[len] = 0;
2972
2973 /*
2974 * Raise problem w.r.t. '&' and '%' being used in non-entities
2975 * reference constructs. Note Charref will be handled in
2976 * xmlStringDecodeEntities()
2977 */
2978 cur = buf;
2979 while (*cur != 0) { /* non input consuming */
2980 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2981 xmlChar *name;
2982 xmlChar tmp = *cur;
2983
2984 cur++;
2985 name = xmlParseStringName(ctxt, &cur);
2986 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002987 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002988 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002990 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002991 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2992 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002993 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002994 }
2995 if (name != NULL)
2996 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002997 if (*cur == 0)
2998 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 cur++;
3001 }
3002
3003 /*
3004 * Then PEReference entities are substituted.
3005 */
3006 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003007 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003008 xmlFree(buf);
3009 } else {
3010 NEXT;
3011 /*
3012 * NOTE: 4.4.7 Bypassed
3013 * When a general entity reference appears in the EntityValue in
3014 * an entity declaration, it is bypassed and left as is.
3015 * so XML_SUBSTITUTE_REF is not set here.
3016 */
3017 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3018 0, 0, 0);
3019 if (orig != NULL)
3020 *orig = buf;
3021 else
3022 xmlFree(buf);
3023 }
3024
3025 return(ret);
3026}
3027
3028/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003029 * xmlParseAttValueComplex:
3030 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003031 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003032 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003033 *
3034 * parse a value for an attribute, this is the fallback function
3035 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003036 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003037 *
3038 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3039 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003040static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003041xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003042 xmlChar limit = 0;
3043 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003044 int len = 0;
3045 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003046 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003047 xmlChar *current = NULL;
3048 xmlEntityPtr ent;
3049
Owen Taylor3473f882001-02-23 17:55:21 +00003050 if (NXT(0) == '"') {
3051 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3052 limit = '"';
3053 NEXT;
3054 } else if (NXT(0) == '\'') {
3055 limit = '\'';
3056 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3057 NEXT;
3058 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003059 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003060 return(NULL);
3061 }
3062
3063 /*
3064 * allocate a translation buffer.
3065 */
3066 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003067 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003068 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003069
3070 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003071 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003072 */
3073 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003074 while ((NXT(0) != limit) && /* checked */
3075 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003077 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003078 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003079 if (NXT(1) == '#') {
3080 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003081
Owen Taylor3473f882001-02-23 17:55:21 +00003082 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003083 if (ctxt->replaceEntities) {
3084 if (len > buf_size - 10) {
3085 growBuffer(buf);
3086 }
3087 buf[len++] = '&';
3088 } else {
3089 /*
3090 * The reparsing will be done in xmlStringGetNodeList()
3091 * called by the attribute() function in SAX.c
3092 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003093 if (len > buf_size - 10) {
3094 growBuffer(buf);
3095 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003096 buf[len++] = '&';
3097 buf[len++] = '#';
3098 buf[len++] = '3';
3099 buf[len++] = '8';
3100 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003101 }
3102 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003103 if (len > buf_size - 10) {
3104 growBuffer(buf);
3105 }
Owen Taylor3473f882001-02-23 17:55:21 +00003106 len += xmlCopyChar(0, &buf[len], val);
3107 }
3108 } else {
3109 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003110 if ((ent != NULL) &&
3111 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3112 if (len > buf_size - 10) {
3113 growBuffer(buf);
3114 }
3115 if ((ctxt->replaceEntities == 0) &&
3116 (ent->content[0] == '&')) {
3117 buf[len++] = '&';
3118 buf[len++] = '#';
3119 buf[len++] = '3';
3120 buf[len++] = '8';
3121 buf[len++] = ';';
3122 } else {
3123 buf[len++] = ent->content[0];
3124 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003125 } else if ((ent != NULL) &&
3126 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003127 xmlChar *rep;
3128
3129 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3130 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003131 XML_SUBSTITUTE_REF,
3132 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (rep != NULL) {
3134 current = rep;
3135 while (*current != 0) { /* non input consuming */
3136 buf[len++] = *current++;
3137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
3140 }
3141 xmlFree(rep);
3142 }
3143 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003144 if (len > buf_size - 10) {
3145 growBuffer(buf);
3146 }
Owen Taylor3473f882001-02-23 17:55:21 +00003147 if (ent->content != NULL)
3148 buf[len++] = ent->content[0];
3149 }
3150 } else if (ent != NULL) {
3151 int i = xmlStrlen(ent->name);
3152 const xmlChar *cur = ent->name;
3153
3154 /*
3155 * This may look absurd but is needed to detect
3156 * entities problems
3157 */
3158 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3159 (ent->content != NULL)) {
3160 xmlChar *rep;
3161 rep = xmlStringDecodeEntities(ctxt, ent->content,
3162 XML_SUBSTITUTE_REF, 0, 0, 0);
3163 if (rep != NULL)
3164 xmlFree(rep);
3165 }
3166
3167 /*
3168 * Just output the reference
3169 */
3170 buf[len++] = '&';
3171 if (len > buf_size - i - 10) {
3172 growBuffer(buf);
3173 }
3174 for (;i > 0;i--)
3175 buf[len++] = *cur++;
3176 buf[len++] = ';';
3177 }
3178 }
3179 } else {
3180 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003181 if ((len != 0) || (!normalize)) {
3182 if ((!normalize) || (!in_space)) {
3183 COPY_BUF(l,buf,len,0x20);
3184 if (len > buf_size - 10) {
3185 growBuffer(buf);
3186 }
3187 }
3188 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003189 }
3190 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003191 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003192 COPY_BUF(l,buf,len,c);
3193 if (len > buf_size - 10) {
3194 growBuffer(buf);
3195 }
3196 }
3197 NEXTL(l);
3198 }
3199 GROW;
3200 c = CUR_CHAR(l);
3201 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003202 if ((in_space) && (normalize)) {
3203 while (buf[len - 1] == 0x20) len--;
3204 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003205 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003206 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003207 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003208 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003209 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3210 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003211 } else
3212 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003213 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003214 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003215
3216mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003217 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003218 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003219}
3220
3221/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003222 * xmlParseAttValue:
3223 * @ctxt: an XML parser context
3224 *
3225 * parse a value for an attribute
3226 * Note: the parser won't do substitution of entities here, this
3227 * will be handled later in xmlStringGetNodeList
3228 *
3229 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3230 * "'" ([^<&'] | Reference)* "'"
3231 *
3232 * 3.3.3 Attribute-Value Normalization:
3233 * Before the value of an attribute is passed to the application or
3234 * checked for validity, the XML processor must normalize it as follows:
3235 * - a character reference is processed by appending the referenced
3236 * character to the attribute value
3237 * - an entity reference is processed by recursively processing the
3238 * replacement text of the entity
3239 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3240 * appending #x20 to the normalized value, except that only a single
3241 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3242 * parsed entity or the literal entity value of an internal parsed entity
3243 * - other characters are processed by appending them to the normalized value
3244 * If the declared value is not CDATA, then the XML processor must further
3245 * process the normalized attribute value by discarding any leading and
3246 * trailing space (#x20) characters, and by replacing sequences of space
3247 * (#x20) characters by a single space (#x20) character.
3248 * All attributes for which no declaration has been read should be treated
3249 * by a non-validating parser as if declared CDATA.
3250 *
3251 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3252 */
3253
3254
3255xmlChar *
3256xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003257 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003258 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003259}
3260
3261/**
Owen Taylor3473f882001-02-23 17:55:21 +00003262 * xmlParseSystemLiteral:
3263 * @ctxt: an XML parser context
3264 *
3265 * parse an XML Literal
3266 *
3267 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3268 *
3269 * Returns the SystemLiteral parsed or NULL
3270 */
3271
3272xmlChar *
3273xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3274 xmlChar *buf = NULL;
3275 int len = 0;
3276 int size = XML_PARSER_BUFFER_SIZE;
3277 int cur, l;
3278 xmlChar stop;
3279 int state = ctxt->instate;
3280 int count = 0;
3281
3282 SHRINK;
3283 if (RAW == '"') {
3284 NEXT;
3285 stop = '"';
3286 } else if (RAW == '\'') {
3287 NEXT;
3288 stop = '\'';
3289 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003291 return(NULL);
3292 }
3293
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003294 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003295 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003296 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003297 return(NULL);
3298 }
3299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3300 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003302 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003303 xmlChar *tmp;
3304
Owen Taylor3473f882001-02-23 17:55:21 +00003305 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003306 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3307 if (tmp == NULL) {
3308 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003309 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003310 ctxt->instate = (xmlParserInputState) state;
3311 return(NULL);
3312 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003313 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 count++;
3316 if (count > 50) {
3317 GROW;
3318 count = 0;
3319 }
3320 COPY_BUF(l,buf,len,cur);
3321 NEXTL(l);
3322 cur = CUR_CHAR(l);
3323 if (cur == 0) {
3324 GROW;
3325 SHRINK;
3326 cur = CUR_CHAR(l);
3327 }
3328 }
3329 buf[len] = 0;
3330 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003331 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003332 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003333 } else {
3334 NEXT;
3335 }
3336 return(buf);
3337}
3338
3339/**
3340 * xmlParsePubidLiteral:
3341 * @ctxt: an XML parser context
3342 *
3343 * parse an XML public literal
3344 *
3345 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3346 *
3347 * Returns the PubidLiteral parsed or NULL.
3348 */
3349
3350xmlChar *
3351xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3352 xmlChar *buf = NULL;
3353 int len = 0;
3354 int size = XML_PARSER_BUFFER_SIZE;
3355 xmlChar cur;
3356 xmlChar stop;
3357 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003358 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003359
3360 SHRINK;
3361 if (RAW == '"') {
3362 NEXT;
3363 stop = '"';
3364 } else if (RAW == '\'') {
3365 NEXT;
3366 stop = '\'';
3367 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003368 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003369 return(NULL);
3370 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003371 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003373 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003374 return(NULL);
3375 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003376 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003377 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003378 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003379 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003380 xmlChar *tmp;
3381
Owen Taylor3473f882001-02-23 17:55:21 +00003382 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003383 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3384 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003385 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003386 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003387 return(NULL);
3388 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003389 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003390 }
3391 buf[len++] = cur;
3392 count++;
3393 if (count > 50) {
3394 GROW;
3395 count = 0;
3396 }
3397 NEXT;
3398 cur = CUR;
3399 if (cur == 0) {
3400 GROW;
3401 SHRINK;
3402 cur = CUR;
3403 }
3404 }
3405 buf[len] = 0;
3406 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003407 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003408 } else {
3409 NEXT;
3410 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003411 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003412 return(buf);
3413}
3414
Daniel Veillard48b2f892001-02-25 16:11:03 +00003415void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003416
3417/*
3418 * used for the test in the inner loop of the char data testing
3419 */
3420static const unsigned char test_char_data[256] = {
3421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3422 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3425 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3426 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3427 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3428 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3429 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3430 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3431 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3432 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3433 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3434 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3435 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3436 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3453};
3454
Owen Taylor3473f882001-02-23 17:55:21 +00003455/**
3456 * xmlParseCharData:
3457 * @ctxt: an XML parser context
3458 * @cdata: int indicating whether we are within a CDATA section
3459 *
3460 * parse a CharData section.
3461 * if we are within a CDATA section ']]>' marks an end of section.
3462 *
3463 * The right angle bracket (>) may be represented using the string "&gt;",
3464 * and must, for compatibility, be escaped using "&gt;" or a character
3465 * reference when it appears in the string "]]>" in content, when that
3466 * string is not marking the end of a CDATA section.
3467 *
3468 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3469 */
3470
3471void
3472xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003473 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003474 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003475 int line = ctxt->input->line;
3476 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003477 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003478
3479 SHRINK;
3480 GROW;
3481 /*
3482 * Accelerated common case where input don't need to be
3483 * modified before passing it to the handler.
3484 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003485 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003486 in = ctxt->input->cur;
3487 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003488get_more_space:
3489 while (*in == 0x20) in++;
3490 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003491 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003492 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003493 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003494 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 goto get_more_space;
3496 }
3497 if (*in == '<') {
3498 nbchar = in - ctxt->input->cur;
3499 if (nbchar > 0) {
3500 const xmlChar *tmp = ctxt->input->cur;
3501 ctxt->input->cur = in;
3502
Daniel Veillard34099b42004-11-04 17:34:35 +00003503 if ((ctxt->sax != NULL) &&
3504 (ctxt->sax->ignorableWhitespace !=
3505 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003506 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003507 if (ctxt->sax->ignorableWhitespace != NULL)
3508 ctxt->sax->ignorableWhitespace(ctxt->userData,
3509 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003510 } else {
3511 if (ctxt->sax->characters != NULL)
3512 ctxt->sax->characters(ctxt->userData,
3513 tmp, nbchar);
3514 if (*ctxt->space == -1)
3515 *ctxt->space = -2;
3516 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003517 } else if ((ctxt->sax != NULL) &&
3518 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003519 ctxt->sax->characters(ctxt->userData,
3520 tmp, nbchar);
3521 }
3522 }
3523 return;
3524 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003525
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003526get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003528 while (test_char_data[*in]) {
3529 in++;
3530 ccol++;
3531 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003532 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003533 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003534 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003535 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003536 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003537 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003539 }
3540 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003541 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003542 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003544 return;
3545 }
3546 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003547 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003548 goto get_more;
3549 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003550 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003551 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003552 if ((ctxt->sax != NULL) &&
3553 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003554 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003555 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003556 const xmlChar *tmp = ctxt->input->cur;
3557 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003558
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003559 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003560 if (ctxt->sax->ignorableWhitespace != NULL)
3561 ctxt->sax->ignorableWhitespace(ctxt->userData,
3562 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003563 } else {
3564 if (ctxt->sax->characters != NULL)
3565 ctxt->sax->characters(ctxt->userData,
3566 tmp, nbchar);
3567 if (*ctxt->space == -1)
3568 *ctxt->space = -2;
3569 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003570 line = ctxt->input->line;
3571 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003572 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003573 if (ctxt->sax->characters != NULL)
3574 ctxt->sax->characters(ctxt->userData,
3575 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003576 line = ctxt->input->line;
3577 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003578 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003579 }
3580 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003581 if (*in == 0xD) {
3582 in++;
3583 if (*in == 0xA) {
3584 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003585 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003586 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003587 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003588 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 in--;
3590 }
3591 if (*in == '<') {
3592 return;
3593 }
3594 if (*in == '&') {
3595 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003596 }
3597 SHRINK;
3598 GROW;
3599 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003600 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003601 nbchar = 0;
3602 }
Daniel Veillard50582112001-03-26 22:52:16 +00003603 ctxt->input->line = line;
3604 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003605 xmlParseCharDataComplex(ctxt, cdata);
3606}
3607
Daniel Veillard01c13b52002-12-10 15:19:08 +00003608/**
3609 * xmlParseCharDataComplex:
3610 * @ctxt: an XML parser context
3611 * @cdata: int indicating whether we are within a CDATA section
3612 *
3613 * parse a CharData section.this is the fallback function
3614 * of xmlParseCharData() when the parsing requires handling
3615 * of non-ASCII characters.
3616 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003617void
3618xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003619 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3620 int nbchar = 0;
3621 int cur, l;
3622 int count = 0;
3623
3624 SHRINK;
3625 GROW;
3626 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003627 while ((cur != '<') && /* checked */
3628 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003629 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if ((cur == ']') && (NXT(1) == ']') &&
3631 (NXT(2) == '>')) {
3632 if (cdata) break;
3633 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003634 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003635 }
3636 }
3637 COPY_BUF(l,buf,nbchar,cur);
3638 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003639 buf[nbchar] = 0;
3640
Owen Taylor3473f882001-02-23 17:55:21 +00003641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003645 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003646 if (ctxt->sax->ignorableWhitespace != NULL)
3647 ctxt->sax->ignorableWhitespace(ctxt->userData,
3648 buf, nbchar);
3649 } else {
3650 if (ctxt->sax->characters != NULL)
3651 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003652 if ((ctxt->sax->characters !=
3653 ctxt->sax->ignorableWhitespace) &&
3654 (*ctxt->space == -1))
3655 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003656 }
3657 }
3658 nbchar = 0;
3659 }
3660 count++;
3661 if (count > 50) {
3662 GROW;
3663 count = 0;
3664 }
3665 NEXTL(l);
3666 cur = CUR_CHAR(l);
3667 }
3668 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003669 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003670 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003671 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003672 */
3673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003674 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003675 if (ctxt->sax->ignorableWhitespace != NULL)
3676 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3677 } else {
3678 if (ctxt->sax->characters != NULL)
3679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003680 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3681 (*ctxt->space == -1))
3682 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003683 }
3684 }
3685 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003686 if ((cur != 0) && (!IS_CHAR(cur))) {
3687 /* Generate the error and skip the offending character */
3688 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3689 "PCDATA invalid Char value %d\n",
3690 cur);
3691 NEXTL(l);
3692 }
Owen Taylor3473f882001-02-23 17:55:21 +00003693}
3694
3695/**
3696 * xmlParseExternalID:
3697 * @ctxt: an XML parser context
3698 * @publicID: a xmlChar** receiving PubidLiteral
3699 * @strict: indicate whether we should restrict parsing to only
3700 * production [75], see NOTE below
3701 *
3702 * Parse an External ID or a Public ID
3703 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003704 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003705 * 'PUBLIC' S PubidLiteral S SystemLiteral
3706 *
3707 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3708 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3709 *
3710 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3711 *
3712 * Returns the function returns SystemLiteral and in the second
3713 * case publicID receives PubidLiteral, is strict is off
3714 * it is possible to return NULL and have publicID set.
3715 */
3716
3717xmlChar *
3718xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3719 xmlChar *URI = NULL;
3720
3721 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003722
3723 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003724 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003725 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3728 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 SKIP_BLANKS;
3731 URI = xmlParseSystemLiteral(ctxt);
3732 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003733 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003734 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003735 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003736 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003737 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003738 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003739 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003740 }
3741 SKIP_BLANKS;
3742 *publicID = xmlParsePubidLiteral(ctxt);
3743 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003744 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 }
3746 if (strict) {
3747 /*
3748 * We don't handle [83] so "S SystemLiteral" is required.
3749 */
William M. Brack76e95df2003-10-18 16:20:14 +00003750 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003751 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003752 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003753 }
3754 } else {
3755 /*
3756 * We handle [83] so we return immediately, if
3757 * "S SystemLiteral" is not detected. From a purely parsing
3758 * point of view that's a nice mess.
3759 */
3760 const xmlChar *ptr;
3761 GROW;
3762
3763 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003764 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003765
William M. Brack76e95df2003-10-18 16:20:14 +00003766 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3768 }
3769 SKIP_BLANKS;
3770 URI = xmlParseSystemLiteral(ctxt);
3771 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003772 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003773 }
3774 }
3775 return(URI);
3776}
3777
3778/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003779 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003780 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * @buf: the already parsed part of the buffer
3782 * @len: number of bytes filles in the buffer
3783 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003784 *
3785 * Skip an XML (SGML) comment <!-- .... -->
3786 * The spec says that "For compatibility, the string "--" (double-hyphen)
3787 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003788 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003789 *
3790 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3791 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003792static void
3793xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003794 int q, ql;
3795 int r, rl;
3796 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003797 xmlParserInputPtr input = ctxt->input;
3798 int count = 0;
3799
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003801 len = 0;
3802 size = XML_PARSER_BUFFER_SIZE;
3803 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3804 if (buf == NULL) {
3805 xmlErrMemory(ctxt, NULL);
3806 return;
3807 }
Owen Taylor3473f882001-02-23 17:55:21 +00003808 }
3809 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003810 if (q == 0)
3811 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003812 NEXTL(ql);
3813 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003814 if (r == 0)
3815 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 NEXTL(rl);
3817 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003818 if (cur == 0)
3819 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003820 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003821 ((cur != '>') ||
3822 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003823 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003824 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
3826 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003827 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003828 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003829 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3830 if (new_buf == NULL) {
3831 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003832 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003833 return;
3834 }
William M. Bracka3215c72004-07-31 16:24:01 +00003835 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003836 }
3837 COPY_BUF(ql,buf,len,q);
3838 q = r;
3839 ql = rl;
3840 r = cur;
3841 rl = l;
3842
3843 count++;
3844 if (count > 50) {
3845 GROW;
3846 count = 0;
3847 }
3848 NEXTL(l);
3849 cur = CUR_CHAR(l);
3850 if (cur == 0) {
3851 SHRINK;
3852 GROW;
3853 cur = CUR_CHAR(l);
3854 }
3855 }
3856 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003857 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003858 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003859 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003860 xmlFree(buf);
3861 } else {
3862 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003863 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3864 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
3866 NEXT;
3867 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3868 (!ctxt->disableSAX))
3869 ctxt->sax->comment(ctxt->userData, buf);
3870 xmlFree(buf);
3871 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003872 return;
3873not_terminated:
3874 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3875 "Comment not terminated\n", NULL);
3876 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003877}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003878/**
3879 * xmlParseComment:
3880 * @ctxt: an XML parser context
3881 *
3882 * Skip an XML (SGML) comment <!-- .... -->
3883 * The spec says that "For compatibility, the string "--" (double-hyphen)
3884 * must not occur within comments. "
3885 *
3886 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3887 */
3888void
3889xmlParseComment(xmlParserCtxtPtr ctxt) {
3890 xmlChar *buf = NULL;
3891 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003892 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003893 xmlParserInputState state;
3894 const xmlChar *in;
3895 int nbchar = 0, ccol;
3896
3897 /*
3898 * Check that there is a comment right here.
3899 */
3900 if ((RAW != '<') || (NXT(1) != '!') ||
3901 (NXT(2) != '-') || (NXT(3) != '-')) return;
3902
3903 state = ctxt->instate;
3904 ctxt->instate = XML_PARSER_COMMENT;
3905 SKIP(4);
3906 SHRINK;
3907 GROW;
3908
3909 /*
3910 * Accelerated common case where input don't need to be
3911 * modified before passing it to the handler.
3912 */
3913 in = ctxt->input->cur;
3914 do {
3915 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003916 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003917 ctxt->input->line++; ctxt->input->col = 1;
3918 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003919 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003920 }
3921get_more:
3922 ccol = ctxt->input->col;
3923 while (((*in > '-') && (*in <= 0x7F)) ||
3924 ((*in >= 0x20) && (*in < '-')) ||
3925 (*in == 0x09)) {
3926 in++;
3927 ccol++;
3928 }
3929 ctxt->input->col = ccol;
3930 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003931 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003932 ctxt->input->line++; ctxt->input->col = 1;
3933 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003934 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003935 goto get_more;
3936 }
3937 nbchar = in - ctxt->input->cur;
3938 /*
3939 * save current set of data
3940 */
3941 if (nbchar > 0) {
3942 if ((ctxt->sax != NULL) &&
3943 (ctxt->sax->comment != NULL)) {
3944 if (buf == NULL) {
3945 if ((*in == '-') && (in[1] == '-'))
3946 size = nbchar + 1;
3947 else
3948 size = XML_PARSER_BUFFER_SIZE + nbchar;
3949 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3950 if (buf == NULL) {
3951 xmlErrMemory(ctxt, NULL);
3952 ctxt->instate = state;
3953 return;
3954 }
3955 len = 0;
3956 } else if (len + nbchar + 1 >= size) {
3957 xmlChar *new_buf;
3958 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3959 new_buf = (xmlChar *) xmlRealloc(buf,
3960 size * sizeof(xmlChar));
3961 if (new_buf == NULL) {
3962 xmlFree (buf);
3963 xmlErrMemory(ctxt, NULL);
3964 ctxt->instate = state;
3965 return;
3966 }
3967 buf = new_buf;
3968 }
3969 memcpy(&buf[len], ctxt->input->cur, nbchar);
3970 len += nbchar;
3971 buf[len] = 0;
3972 }
3973 }
3974 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003975 if (*in == 0xA) {
3976 in++;
3977 ctxt->input->line++; ctxt->input->col = 1;
3978 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003979 if (*in == 0xD) {
3980 in++;
3981 if (*in == 0xA) {
3982 ctxt->input->cur = in;
3983 in++;
3984 ctxt->input->line++; ctxt->input->col = 1;
3985 continue; /* while */
3986 }
3987 in--;
3988 }
3989 SHRINK;
3990 GROW;
3991 in = ctxt->input->cur;
3992 if (*in == '-') {
3993 if (in[1] == '-') {
3994 if (in[2] == '>') {
3995 SKIP(3);
3996 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3997 (!ctxt->disableSAX)) {
3998 if (buf != NULL)
3999 ctxt->sax->comment(ctxt->userData, buf);
4000 else
4001 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4002 }
4003 if (buf != NULL)
4004 xmlFree(buf);
4005 ctxt->instate = state;
4006 return;
4007 }
4008 if (buf != NULL)
4009 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4010 "Comment not terminated \n<!--%.50s\n",
4011 buf);
4012 else
4013 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4014 "Comment not terminated \n", NULL);
4015 in++;
4016 ctxt->input->col++;
4017 }
4018 in++;
4019 ctxt->input->col++;
4020 goto get_more;
4021 }
4022 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4023 xmlParseCommentComplex(ctxt, buf, len, size);
4024 ctxt->instate = state;
4025 return;
4026}
4027
Owen Taylor3473f882001-02-23 17:55:21 +00004028
4029/**
4030 * xmlParsePITarget:
4031 * @ctxt: an XML parser context
4032 *
4033 * parse the name of a PI
4034 *
4035 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4036 *
4037 * Returns the PITarget name or NULL
4038 */
4039
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004040const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004041xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004042 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004043
4044 name = xmlParseName(ctxt);
4045 if ((name != NULL) &&
4046 ((name[0] == 'x') || (name[0] == 'X')) &&
4047 ((name[1] == 'm') || (name[1] == 'M')) &&
4048 ((name[2] == 'l') || (name[2] == 'L'))) {
4049 int i;
4050 if ((name[0] == 'x') && (name[1] == 'm') &&
4051 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004052 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004053 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004054 return(name);
4055 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004056 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004057 return(name);
4058 }
4059 for (i = 0;;i++) {
4060 if (xmlW3CPIs[i] == NULL) break;
4061 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4062 return(name);
4063 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004064 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4065 "xmlParsePITarget: invalid name prefix 'xml'\n",
4066 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004067 }
4068 return(name);
4069}
4070
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004071#ifdef LIBXML_CATALOG_ENABLED
4072/**
4073 * xmlParseCatalogPI:
4074 * @ctxt: an XML parser context
4075 * @catalog: the PI value string
4076 *
4077 * parse an XML Catalog Processing Instruction.
4078 *
4079 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4080 *
4081 * Occurs only if allowed by the user and if happening in the Misc
4082 * part of the document before any doctype informations
4083 * This will add the given catalog to the parsing context in order
4084 * to be used if there is a resolution need further down in the document
4085 */
4086
4087static void
4088xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4089 xmlChar *URL = NULL;
4090 const xmlChar *tmp, *base;
4091 xmlChar marker;
4092
4093 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004094 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004095 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4096 goto error;
4097 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004098 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004099 if (*tmp != '=') {
4100 return;
4101 }
4102 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004103 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004104 marker = *tmp;
4105 if ((marker != '\'') && (marker != '"'))
4106 goto error;
4107 tmp++;
4108 base = tmp;
4109 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4110 if (*tmp == 0)
4111 goto error;
4112 URL = xmlStrndup(base, tmp - base);
4113 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004114 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004115 if (*tmp != 0)
4116 goto error;
4117
4118 if (URL != NULL) {
4119 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4120 xmlFree(URL);
4121 }
4122 return;
4123
4124error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004125 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4126 "Catalog PI syntax error: %s\n",
4127 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004128 if (URL != NULL)
4129 xmlFree(URL);
4130}
4131#endif
4132
Owen Taylor3473f882001-02-23 17:55:21 +00004133/**
4134 * xmlParsePI:
4135 * @ctxt: an XML parser context
4136 *
4137 * parse an XML Processing Instruction.
4138 *
4139 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4140 *
4141 * The processing is transfered to SAX once parsed.
4142 */
4143
4144void
4145xmlParsePI(xmlParserCtxtPtr ctxt) {
4146 xmlChar *buf = NULL;
4147 int len = 0;
4148 int size = XML_PARSER_BUFFER_SIZE;
4149 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004150 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004151 xmlParserInputState state;
4152 int count = 0;
4153
4154 if ((RAW == '<') && (NXT(1) == '?')) {
4155 xmlParserInputPtr input = ctxt->input;
4156 state = ctxt->instate;
4157 ctxt->instate = XML_PARSER_PI;
4158 /*
4159 * this is a Processing Instruction.
4160 */
4161 SKIP(2);
4162 SHRINK;
4163
4164 /*
4165 * Parse the target name and check for special support like
4166 * namespace.
4167 */
4168 target = xmlParsePITarget(ctxt);
4169 if (target != NULL) {
4170 if ((RAW == '?') && (NXT(1) == '>')) {
4171 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004172 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4173 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004174 }
4175 SKIP(2);
4176
4177 /*
4178 * SAX: PI detected.
4179 */
4180 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4181 (ctxt->sax->processingInstruction != NULL))
4182 ctxt->sax->processingInstruction(ctxt->userData,
4183 target, NULL);
4184 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004185 return;
4186 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004187 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004188 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004189 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004190 ctxt->instate = state;
4191 return;
4192 }
4193 cur = CUR;
4194 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004195 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4196 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004197 }
4198 SKIP_BLANKS;
4199 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004200 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004201 ((cur != '?') || (NXT(1) != '>'))) {
4202 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004203 xmlChar *tmp;
4204
Owen Taylor3473f882001-02-23 17:55:21 +00004205 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004206 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4207 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004208 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004209 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004210 ctxt->instate = state;
4211 return;
4212 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004213 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 }
4215 count++;
4216 if (count > 50) {
4217 GROW;
4218 count = 0;
4219 }
4220 COPY_BUF(l,buf,len,cur);
4221 NEXTL(l);
4222 cur = CUR_CHAR(l);
4223 if (cur == 0) {
4224 SHRINK;
4225 GROW;
4226 cur = CUR_CHAR(l);
4227 }
4228 }
4229 buf[len] = 0;
4230 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004231 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4232 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004233 } else {
4234 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004235 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4236 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004237 }
4238 SKIP(2);
4239
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004240#ifdef LIBXML_CATALOG_ENABLED
4241 if (((state == XML_PARSER_MISC) ||
4242 (state == XML_PARSER_START)) &&
4243 (xmlStrEqual(target, XML_CATALOG_PI))) {
4244 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4245 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4246 (allow == XML_CATA_ALLOW_ALL))
4247 xmlParseCatalogPI(ctxt, buf);
4248 }
4249#endif
4250
4251
Owen Taylor3473f882001-02-23 17:55:21 +00004252 /*
4253 * SAX: PI detected.
4254 */
4255 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4256 (ctxt->sax->processingInstruction != NULL))
4257 ctxt->sax->processingInstruction(ctxt->userData,
4258 target, buf);
4259 }
4260 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004261 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004262 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 }
4264 ctxt->instate = state;
4265 }
4266}
4267
4268/**
4269 * xmlParseNotationDecl:
4270 * @ctxt: an XML parser context
4271 *
4272 * parse a notation declaration
4273 *
4274 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4275 *
4276 * Hence there is actually 3 choices:
4277 * 'PUBLIC' S PubidLiteral
4278 * 'PUBLIC' S PubidLiteral S SystemLiteral
4279 * and 'SYSTEM' S SystemLiteral
4280 *
4281 * See the NOTE on xmlParseExternalID().
4282 */
4283
4284void
4285xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004286 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004287 xmlChar *Pubid;
4288 xmlChar *Systemid;
4289
Daniel Veillarda07050d2003-10-19 14:46:32 +00004290 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004291 xmlParserInputPtr input = ctxt->input;
4292 SHRINK;
4293 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004294 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004295 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4296 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004297 return;
4298 }
4299 SKIP_BLANKS;
4300
Daniel Veillard76d66f42001-05-16 21:05:17 +00004301 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004302 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004303 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004304 return;
4305 }
William M. Brack76e95df2003-10-18 16:20:14 +00004306 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004308 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004309 return;
4310 }
4311 SKIP_BLANKS;
4312
4313 /*
4314 * Parse the IDs.
4315 */
4316 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4317 SKIP_BLANKS;
4318
4319 if (RAW == '>') {
4320 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004321 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4322 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
4324 NEXT;
4325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4326 (ctxt->sax->notationDecl != NULL))
4327 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004329 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
Owen Taylor3473f882001-02-23 17:55:21 +00004331 if (Systemid != NULL) xmlFree(Systemid);
4332 if (Pubid != NULL) xmlFree(Pubid);
4333 }
4334}
4335
4336/**
4337 * xmlParseEntityDecl:
4338 * @ctxt: an XML parser context
4339 *
4340 * parse <!ENTITY declarations
4341 *
4342 * [70] EntityDecl ::= GEDecl | PEDecl
4343 *
4344 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4345 *
4346 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4347 *
4348 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4349 *
4350 * [74] PEDef ::= EntityValue | ExternalID
4351 *
4352 * [76] NDataDecl ::= S 'NDATA' S Name
4353 *
4354 * [ VC: Notation Declared ]
4355 * The Name must match the declared name of a notation.
4356 */
4357
4358void
4359xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004360 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004361 xmlChar *value = NULL;
4362 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004363 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 int isParameter = 0;
4365 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004366 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004367
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004369 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004370 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 SHRINK;
4372 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004373 skipped = SKIP_BLANKS;
4374 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4376 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004377 }
Owen Taylor3473f882001-02-23 17:55:21 +00004378
4379 if (RAW == '%') {
4380 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004381 skipped = SKIP_BLANKS;
4382 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004383 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4384 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004385 }
Owen Taylor3473f882001-02-23 17:55:21 +00004386 isParameter = 1;
4387 }
4388
Daniel Veillard76d66f42001-05-16 21:05:17 +00004389 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004391 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4392 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004393 return;
4394 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004395 skipped = SKIP_BLANKS;
4396 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004397 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4398 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
Owen Taylor3473f882001-02-23 17:55:21 +00004400
Daniel Veillardf5582f12002-06-11 10:08:16 +00004401 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004402 /*
4403 * handle the various case of definitions...
4404 */
4405 if (isParameter) {
4406 if ((RAW == '"') || (RAW == '\'')) {
4407 value = xmlParseEntityValue(ctxt, &orig);
4408 if (value) {
4409 if ((ctxt->sax != NULL) &&
4410 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4411 ctxt->sax->entityDecl(ctxt->userData, name,
4412 XML_INTERNAL_PARAMETER_ENTITY,
4413 NULL, NULL, value);
4414 }
4415 } else {
4416 URI = xmlParseExternalID(ctxt, &literal, 1);
4417 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004418 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004419 }
4420 if (URI) {
4421 xmlURIPtr uri;
4422
4423 uri = xmlParseURI((const char *) URI);
4424 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004425 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4426 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004427 /*
4428 * This really ought to be a well formedness error
4429 * but the XML Core WG decided otherwise c.f. issue
4430 * E26 of the XML erratas.
4431 */
Owen Taylor3473f882001-02-23 17:55:21 +00004432 } else {
4433 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004434 /*
4435 * Okay this is foolish to block those but not
4436 * invalid URIs.
4437 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 } else {
4440 if ((ctxt->sax != NULL) &&
4441 (!ctxt->disableSAX) &&
4442 (ctxt->sax->entityDecl != NULL))
4443 ctxt->sax->entityDecl(ctxt->userData, name,
4444 XML_EXTERNAL_PARAMETER_ENTITY,
4445 literal, URI, NULL);
4446 }
4447 xmlFreeURI(uri);
4448 }
4449 }
4450 }
4451 } else {
4452 if ((RAW == '"') || (RAW == '\'')) {
4453 value = xmlParseEntityValue(ctxt, &orig);
4454 if ((ctxt->sax != NULL) &&
4455 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4456 ctxt->sax->entityDecl(ctxt->userData, name,
4457 XML_INTERNAL_GENERAL_ENTITY,
4458 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004459 /*
4460 * For expat compatibility in SAX mode.
4461 */
4462 if ((ctxt->myDoc == NULL) ||
4463 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4464 if (ctxt->myDoc == NULL) {
4465 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4466 }
4467 if (ctxt->myDoc->intSubset == NULL)
4468 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4469 BAD_CAST "fake", NULL, NULL);
4470
Daniel Veillard1af9a412003-08-20 22:54:39 +00004471 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4472 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004473 }
Owen Taylor3473f882001-02-23 17:55:21 +00004474 } else {
4475 URI = xmlParseExternalID(ctxt, &literal, 1);
4476 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004477 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004478 }
4479 if (URI) {
4480 xmlURIPtr uri;
4481
4482 uri = xmlParseURI((const char *)URI);
4483 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004484 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4485 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004486 /*
4487 * This really ought to be a well formedness error
4488 * but the XML Core WG decided otherwise c.f. issue
4489 * E26 of the XML erratas.
4490 */
Owen Taylor3473f882001-02-23 17:55:21 +00004491 } else {
4492 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004493 /*
4494 * Okay this is foolish to block those but not
4495 * invalid URIs.
4496 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004497 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004498 }
4499 xmlFreeURI(uri);
4500 }
4501 }
William M. Brack76e95df2003-10-18 16:20:14 +00004502 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4504 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004505 }
4506 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004507 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004508 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004509 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004510 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4511 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004512 }
4513 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004514 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004515 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4516 (ctxt->sax->unparsedEntityDecl != NULL))
4517 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4518 literal, URI, ndata);
4519 } else {
4520 if ((ctxt->sax != NULL) &&
4521 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4522 ctxt->sax->entityDecl(ctxt->userData, name,
4523 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4524 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004525 /*
4526 * For expat compatibility in SAX mode.
4527 * assuming the entity repalcement was asked for
4528 */
4529 if ((ctxt->replaceEntities != 0) &&
4530 ((ctxt->myDoc == NULL) ||
4531 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4532 if (ctxt->myDoc == NULL) {
4533 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4534 }
4535
4536 if (ctxt->myDoc->intSubset == NULL)
4537 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4538 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004539 xmlSAX2EntityDecl(ctxt, name,
4540 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4541 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004542 }
Owen Taylor3473f882001-02-23 17:55:21 +00004543 }
4544 }
4545 }
4546 SKIP_BLANKS;
4547 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004548 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004549 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004550 } else {
4551 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004552 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4553 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004554 }
4555 NEXT;
4556 }
4557 if (orig != NULL) {
4558 /*
4559 * Ugly mechanism to save the raw entity value.
4560 */
4561 xmlEntityPtr cur = NULL;
4562
4563 if (isParameter) {
4564 if ((ctxt->sax != NULL) &&
4565 (ctxt->sax->getParameterEntity != NULL))
4566 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4567 } else {
4568 if ((ctxt->sax != NULL) &&
4569 (ctxt->sax->getEntity != NULL))
4570 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004571 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004572 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004573 }
Owen Taylor3473f882001-02-23 17:55:21 +00004574 }
4575 if (cur != NULL) {
4576 if (cur->orig != NULL)
4577 xmlFree(orig);
4578 else
4579 cur->orig = orig;
4580 } else
4581 xmlFree(orig);
4582 }
Owen Taylor3473f882001-02-23 17:55:21 +00004583 if (value != NULL) xmlFree(value);
4584 if (URI != NULL) xmlFree(URI);
4585 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004586 }
4587}
4588
4589/**
4590 * xmlParseDefaultDecl:
4591 * @ctxt: an XML parser context
4592 * @value: Receive a possible fixed default value for the attribute
4593 *
4594 * Parse an attribute default declaration
4595 *
4596 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4597 *
4598 * [ VC: Required Attribute ]
4599 * if the default declaration is the keyword #REQUIRED, then the
4600 * attribute must be specified for all elements of the type in the
4601 * attribute-list declaration.
4602 *
4603 * [ VC: Attribute Default Legal ]
4604 * The declared default value must meet the lexical constraints of
4605 * the declared attribute type c.f. xmlValidateAttributeDecl()
4606 *
4607 * [ VC: Fixed Attribute Default ]
4608 * if an attribute has a default value declared with the #FIXED
4609 * keyword, instances of that attribute must match the default value.
4610 *
4611 * [ WFC: No < in Attribute Values ]
4612 * handled in xmlParseAttValue()
4613 *
4614 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4615 * or XML_ATTRIBUTE_FIXED.
4616 */
4617
4618int
4619xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4620 int val;
4621 xmlChar *ret;
4622
4623 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004624 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004625 SKIP(9);
4626 return(XML_ATTRIBUTE_REQUIRED);
4627 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004628 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004629 SKIP(8);
4630 return(XML_ATTRIBUTE_IMPLIED);
4631 }
4632 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004633 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004634 SKIP(6);
4635 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004636 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4638 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004639 }
4640 SKIP_BLANKS;
4641 }
4642 ret = xmlParseAttValue(ctxt);
4643 ctxt->instate = XML_PARSER_DTD;
4644 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004645 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004646 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004647 } else
4648 *value = ret;
4649 return(val);
4650}
4651
4652/**
4653 * xmlParseNotationType:
4654 * @ctxt: an XML parser context
4655 *
4656 * parse an Notation attribute type.
4657 *
4658 * Note: the leading 'NOTATION' S part has already being parsed...
4659 *
4660 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4661 *
4662 * [ VC: Notation Attributes ]
4663 * Values of this type must match one of the notation names included
4664 * in the declaration; all notation names in the declaration must be declared.
4665 *
4666 * Returns: the notation attribute tree built while parsing
4667 */
4668
4669xmlEnumerationPtr
4670xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004671 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004672 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4673
4674 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004675 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004676 return(NULL);
4677 }
4678 SHRINK;
4679 do {
4680 NEXT;
4681 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004682 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004684 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4685 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004686 return(ret);
4687 }
4688 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004689 if (cur == NULL) return(ret);
4690 if (last == NULL) ret = last = cur;
4691 else {
4692 last->next = cur;
4693 last = cur;
4694 }
4695 SKIP_BLANKS;
4696 } while (RAW == '|');
4697 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004698 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004699 if ((last != NULL) && (last != ret))
4700 xmlFreeEnumeration(last);
4701 return(ret);
4702 }
4703 NEXT;
4704 return(ret);
4705}
4706
4707/**
4708 * xmlParseEnumerationType:
4709 * @ctxt: an XML parser context
4710 *
4711 * parse an Enumeration attribute type.
4712 *
4713 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4714 *
4715 * [ VC: Enumeration ]
4716 * Values of this type must match one of the Nmtoken tokens in
4717 * the declaration
4718 *
4719 * Returns: the enumeration attribute tree built while parsing
4720 */
4721
4722xmlEnumerationPtr
4723xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4724 xmlChar *name;
4725 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4726
4727 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004728 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004729 return(NULL);
4730 }
4731 SHRINK;
4732 do {
4733 NEXT;
4734 SKIP_BLANKS;
4735 name = xmlParseNmtoken(ctxt);
4736 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004737 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 return(ret);
4739 }
4740 cur = xmlCreateEnumeration(name);
4741 xmlFree(name);
4742 if (cur == NULL) return(ret);
4743 if (last == NULL) ret = last = cur;
4744 else {
4745 last->next = cur;
4746 last = cur;
4747 }
4748 SKIP_BLANKS;
4749 } while (RAW == '|');
4750 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004751 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004752 return(ret);
4753 }
4754 NEXT;
4755 return(ret);
4756}
4757
4758/**
4759 * xmlParseEnumeratedType:
4760 * @ctxt: an XML parser context
4761 * @tree: the enumeration tree built while parsing
4762 *
4763 * parse an Enumerated attribute type.
4764 *
4765 * [57] EnumeratedType ::= NotationType | Enumeration
4766 *
4767 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4768 *
4769 *
4770 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4771 */
4772
4773int
4774xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004775 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004776 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4779 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004780 return(0);
4781 }
4782 SKIP_BLANKS;
4783 *tree = xmlParseNotationType(ctxt);
4784 if (*tree == NULL) return(0);
4785 return(XML_ATTRIBUTE_NOTATION);
4786 }
4787 *tree = xmlParseEnumerationType(ctxt);
4788 if (*tree == NULL) return(0);
4789 return(XML_ATTRIBUTE_ENUMERATION);
4790}
4791
4792/**
4793 * xmlParseAttributeType:
4794 * @ctxt: an XML parser context
4795 * @tree: the enumeration tree built while parsing
4796 *
4797 * parse the Attribute list def for an element
4798 *
4799 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4800 *
4801 * [55] StringType ::= 'CDATA'
4802 *
4803 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4804 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4805 *
4806 * Validity constraints for attribute values syntax are checked in
4807 * xmlValidateAttributeValue()
4808 *
4809 * [ VC: ID ]
4810 * Values of type ID must match the Name production. A name must not
4811 * appear more than once in an XML document as a value of this type;
4812 * i.e., ID values must uniquely identify the elements which bear them.
4813 *
4814 * [ VC: One ID per Element Type ]
4815 * No element type may have more than one ID attribute specified.
4816 *
4817 * [ VC: ID Attribute Default ]
4818 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4819 *
4820 * [ VC: IDREF ]
4821 * Values of type IDREF must match the Name production, and values
4822 * of type IDREFS must match Names; each IDREF Name must match the value
4823 * of an ID attribute on some element in the XML document; i.e. IDREF
4824 * values must match the value of some ID attribute.
4825 *
4826 * [ VC: Entity Name ]
4827 * Values of type ENTITY must match the Name production, values
4828 * of type ENTITIES must match Names; each Entity Name must match the
4829 * name of an unparsed entity declared in the DTD.
4830 *
4831 * [ VC: Name Token ]
4832 * Values of type NMTOKEN must match the Nmtoken production; values
4833 * of type NMTOKENS must match Nmtokens.
4834 *
4835 * Returns the attribute type
4836 */
4837int
4838xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4839 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004840 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004841 SKIP(5);
4842 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004843 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 SKIP(6);
4845 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004846 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004847 SKIP(5);
4848 return(XML_ATTRIBUTE_IDREF);
4849 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4850 SKIP(2);
4851 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004852 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004853 SKIP(6);
4854 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004855 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP(8);
4857 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004858 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004859 SKIP(8);
4860 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004861 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004862 SKIP(7);
4863 return(XML_ATTRIBUTE_NMTOKEN);
4864 }
4865 return(xmlParseEnumeratedType(ctxt, tree));
4866}
4867
4868/**
4869 * xmlParseAttributeListDecl:
4870 * @ctxt: an XML parser context
4871 *
4872 * : parse the Attribute list def for an element
4873 *
4874 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4875 *
4876 * [53] AttDef ::= S Name S AttType S DefaultDecl
4877 *
4878 */
4879void
4880xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004881 const xmlChar *elemName;
4882 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004883 xmlEnumerationPtr tree;
4884
Daniel Veillarda07050d2003-10-19 14:46:32 +00004885 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004886 xmlParserInputPtr input = ctxt->input;
4887
4888 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004889 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004890 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004891 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004892 }
4893 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004894 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004895 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4897 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004898 return;
4899 }
4900 SKIP_BLANKS;
4901 GROW;
4902 while (RAW != '>') {
4903 const xmlChar *check = CUR_PTR;
4904 int type;
4905 int def;
4906 xmlChar *defaultValue = NULL;
4907
4908 GROW;
4909 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004910 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004911 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004912 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4913 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004914 break;
4915 }
4916 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004917 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004918 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004919 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004920 break;
4921 }
4922 SKIP_BLANKS;
4923
4924 type = xmlParseAttributeType(ctxt, &tree);
4925 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004926 break;
4927 }
4928
4929 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004930 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004931 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4932 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004933 if (tree != NULL)
4934 xmlFreeEnumeration(tree);
4935 break;
4936 }
4937 SKIP_BLANKS;
4938
4939 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4940 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004941 if (defaultValue != NULL)
4942 xmlFree(defaultValue);
4943 if (tree != NULL)
4944 xmlFreeEnumeration(tree);
4945 break;
4946 }
4947
4948 GROW;
4949 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004950 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004952 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004953 if (defaultValue != NULL)
4954 xmlFree(defaultValue);
4955 if (tree != NULL)
4956 xmlFreeEnumeration(tree);
4957 break;
4958 }
4959 SKIP_BLANKS;
4960 }
4961 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004962 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4963 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004964 if (defaultValue != NULL)
4965 xmlFree(defaultValue);
4966 if (tree != NULL)
4967 xmlFreeEnumeration(tree);
4968 break;
4969 }
4970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->attributeDecl != NULL))
4972 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4973 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004974 else if (tree != NULL)
4975 xmlFreeEnumeration(tree);
4976
4977 if ((ctxt->sax2) && (defaultValue != NULL) &&
4978 (def != XML_ATTRIBUTE_IMPLIED) &&
4979 (def != XML_ATTRIBUTE_REQUIRED)) {
4980 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4981 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004982 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4983 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4984 }
Owen Taylor3473f882001-02-23 17:55:21 +00004985 if (defaultValue != NULL)
4986 xmlFree(defaultValue);
4987 GROW;
4988 }
4989 if (RAW == '>') {
4990 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004991 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4992 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004993 }
4994 NEXT;
4995 }
Owen Taylor3473f882001-02-23 17:55:21 +00004996 }
4997}
4998
4999/**
5000 * xmlParseElementMixedContentDecl:
5001 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005002 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005003 *
5004 * parse the declaration for a Mixed Element content
5005 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5006 *
5007 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5008 * '(' S? '#PCDATA' S? ')'
5009 *
5010 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5011 *
5012 * [ VC: No Duplicate Types ]
5013 * The same name must not appear more than once in a single
5014 * mixed-content declaration.
5015 *
5016 * returns: the list of the xmlElementContentPtr describing the element choices
5017 */
5018xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005019xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005020 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005021 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005022
5023 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005024 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005025 SKIP(7);
5026 SKIP_BLANKS;
5027 SHRINK;
5028 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005030 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5031"Element content declaration doesn't start and stop in the same entity\n",
5032 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005033 }
Owen Taylor3473f882001-02-23 17:55:21 +00005034 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005035 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005036 if (RAW == '*') {
5037 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5038 NEXT;
5039 }
5040 return(ret);
5041 }
5042 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005043 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005044 if (ret == NULL) return(NULL);
5045 }
5046 while (RAW == '|') {
5047 NEXT;
5048 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005049 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 if (ret == NULL) return(NULL);
5051 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005052 if (cur != NULL)
5053 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005054 cur = ret;
5055 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005056 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005057 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005058 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005059 if (n->c1 != NULL)
5060 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005061 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005062 if (n != NULL)
5063 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005065 }
5066 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005067 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005068 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005069 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005070 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005071 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005072 return(NULL);
5073 }
5074 SKIP_BLANKS;
5075 GROW;
5076 }
5077 if ((RAW == ')') && (NXT(1) == '*')) {
5078 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005079 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005080 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005081 if (cur->c2 != NULL)
5082 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005083 }
5084 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005085 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005086 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5087"Element content declaration doesn't start and stop in the same entity\n",
5088 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005089 }
Owen Taylor3473f882001-02-23 17:55:21 +00005090 SKIP(2);
5091 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005092 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005093 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005094 return(NULL);
5095 }
5096
5097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005098 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005099 }
5100 return(ret);
5101}
5102
5103/**
5104 * xmlParseElementChildrenContentDecl:
5105 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005106 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005107 *
5108 * parse the declaration for a Mixed Element content
5109 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5110 *
5111 *
5112 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5113 *
5114 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5115 *
5116 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5117 *
5118 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5119 *
5120 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5121 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005122 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005123 * opening or closing parentheses in a choice, seq, or Mixed
5124 * construct is contained in the replacement text for a parameter
5125 * entity, both must be contained in the same replacement text. For
5126 * interoperability, if a parameter-entity reference appears in a
5127 * choice, seq, or Mixed construct, its replacement text should not
5128 * be empty, and neither the first nor last non-blank character of
5129 * the replacement text should be a connector (| or ,).
5130 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005131 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005132 * hierarchy.
5133 */
5134xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005135xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005136 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005137 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 xmlChar type = 0;
5139
5140 SKIP_BLANKS;
5141 GROW;
5142 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005144
Owen Taylor3473f882001-02-23 17:55:21 +00005145 /* Recurse on first child */
5146 NEXT;
5147 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005148 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005149 SKIP_BLANKS;
5150 GROW;
5151 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005152 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005154 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 return(NULL);
5156 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005157 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005158 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005159 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005160 return(NULL);
5161 }
Owen Taylor3473f882001-02-23 17:55:21 +00005162 GROW;
5163 if (RAW == '?') {
5164 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5165 NEXT;
5166 } else if (RAW == '*') {
5167 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5168 NEXT;
5169 } else if (RAW == '+') {
5170 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5171 NEXT;
5172 } else {
5173 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5174 }
Owen Taylor3473f882001-02-23 17:55:21 +00005175 GROW;
5176 }
5177 SKIP_BLANKS;
5178 SHRINK;
5179 while (RAW != ')') {
5180 /*
5181 * Each loop we parse one separator and one element.
5182 */
5183 if (RAW == ',') {
5184 if (type == 0) type = CUR;
5185
5186 /*
5187 * Detect "Name | Name , Name" error
5188 */
5189 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005190 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005191 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005193 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005194 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005195 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005196 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 return(NULL);
5198 }
5199 NEXT;
5200
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005201 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005203 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005204 xmlFreeDocElementContent(ctxt->myDoc, last);
5205 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005206 return(NULL);
5207 }
5208 if (last == NULL) {
5209 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005210 if (ret != NULL)
5211 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005212 ret = cur = op;
5213 } else {
5214 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005215 if (op != NULL)
5216 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005217 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005218 if (last != NULL)
5219 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005220 cur =op;
5221 last = NULL;
5222 }
5223 } else if (RAW == '|') {
5224 if (type == 0) type = CUR;
5225
5226 /*
5227 * Detect "Name , Name | Name" error
5228 */
5229 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005230 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005231 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005232 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005233 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005234 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005235 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 return(NULL);
5238 }
5239 NEXT;
5240
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005241 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005242 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005243 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005246 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 return(NULL);
5248 }
5249 if (last == NULL) {
5250 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005251 if (ret != NULL)
5252 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005253 ret = cur = op;
5254 } else {
5255 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005256 if (op != NULL)
5257 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005258 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005259 if (last != NULL)
5260 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 cur =op;
5262 last = NULL;
5263 }
5264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005265 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005267 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 return(NULL);
5269 }
5270 GROW;
5271 SKIP_BLANKS;
5272 GROW;
5273 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005274 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005275 /* Recurse on second child */
5276 NEXT;
5277 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005278 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005279 SKIP_BLANKS;
5280 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005281 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005283 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005285 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 return(NULL);
5287 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005288 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005289 if (RAW == '?') {
5290 last->ocur = XML_ELEMENT_CONTENT_OPT;
5291 NEXT;
5292 } else if (RAW == '*') {
5293 last->ocur = XML_ELEMENT_CONTENT_MULT;
5294 NEXT;
5295 } else if (RAW == '+') {
5296 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5297 NEXT;
5298 } else {
5299 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5300 }
5301 }
5302 SKIP_BLANKS;
5303 GROW;
5304 }
5305 if ((cur != NULL) && (last != NULL)) {
5306 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005307 if (last != NULL)
5308 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005309 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005310 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005311 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5312"Element content declaration doesn't start and stop in the same entity\n",
5313 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005314 }
Owen Taylor3473f882001-02-23 17:55:21 +00005315 NEXT;
5316 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005317 if (ret != NULL) {
5318 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5319 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5320 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5321 else
5322 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5323 }
Owen Taylor3473f882001-02-23 17:55:21 +00005324 NEXT;
5325 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005326 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005327 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005328 cur = ret;
5329 /*
5330 * Some normalization:
5331 * (a | b* | c?)* == (a | b | c)*
5332 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005333 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005334 if ((cur->c1 != NULL) &&
5335 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5336 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5337 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5338 if ((cur->c2 != NULL) &&
5339 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5340 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5341 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5342 cur = cur->c2;
5343 }
5344 }
Owen Taylor3473f882001-02-23 17:55:21 +00005345 NEXT;
5346 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005347 if (ret != NULL) {
5348 int found = 0;
5349
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005350 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5351 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5352 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005353 else
5354 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005355 /*
5356 * Some normalization:
5357 * (a | b*)+ == (a | b)*
5358 * (a | b?)+ == (a | b)*
5359 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005360 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005361 if ((cur->c1 != NULL) &&
5362 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5363 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5364 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5365 found = 1;
5366 }
5367 if ((cur->c2 != NULL) &&
5368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5371 found = 1;
5372 }
5373 cur = cur->c2;
5374 }
5375 if (found)
5376 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5377 }
Owen Taylor3473f882001-02-23 17:55:21 +00005378 NEXT;
5379 }
5380 return(ret);
5381}
5382
5383/**
5384 * xmlParseElementContentDecl:
5385 * @ctxt: an XML parser context
5386 * @name: the name of the element being defined.
5387 * @result: the Element Content pointer will be stored here if any
5388 *
5389 * parse the declaration for an Element content either Mixed or Children,
5390 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5391 *
5392 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5393 *
5394 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5395 */
5396
5397int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005398xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005399 xmlElementContentPtr *result) {
5400
5401 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005402 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005403 int res;
5404
5405 *result = NULL;
5406
5407 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005408 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005409 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005410 return(-1);
5411 }
5412 NEXT;
5413 GROW;
5414 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005415 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005416 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 res = XML_ELEMENT_TYPE_MIXED;
5418 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 res = XML_ELEMENT_TYPE_ELEMENT;
5421 }
Owen Taylor3473f882001-02-23 17:55:21 +00005422 SKIP_BLANKS;
5423 *result = tree;
5424 return(res);
5425}
5426
5427/**
5428 * xmlParseElementDecl:
5429 * @ctxt: an XML parser context
5430 *
5431 * parse an Element declaration.
5432 *
5433 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5434 *
5435 * [ VC: Unique Element Type Declaration ]
5436 * No element type may be declared more than once
5437 *
5438 * Returns the type of the element, or -1 in case of error
5439 */
5440int
5441xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005442 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005443 int ret = -1;
5444 xmlElementContentPtr content = NULL;
5445
Daniel Veillard4c778d82005-01-23 17:37:44 +00005446 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005447 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005448 xmlParserInputPtr input = ctxt->input;
5449
5450 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005451 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005454 }
5455 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005456 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005457 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005458 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5459 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005460 return(-1);
5461 }
5462 while ((RAW == 0) && (ctxt->inputNr > 1))
5463 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005464 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5466 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
5468 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005469 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005470 SKIP(5);
5471 /*
5472 * Element must always be empty.
5473 */
5474 ret = XML_ELEMENT_TYPE_EMPTY;
5475 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5476 (NXT(2) == 'Y')) {
5477 SKIP(3);
5478 /*
5479 * Element is a generic container.
5480 */
5481 ret = XML_ELEMENT_TYPE_ANY;
5482 } else if (RAW == '(') {
5483 ret = xmlParseElementContentDecl(ctxt, name, &content);
5484 } else {
5485 /*
5486 * [ WFC: PEs in Internal Subset ] error handling.
5487 */
5488 if ((RAW == '%') && (ctxt->external == 0) &&
5489 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005490 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005491 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005492 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005494 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5495 }
Owen Taylor3473f882001-02-23 17:55:21 +00005496 return(-1);
5497 }
5498
5499 SKIP_BLANKS;
5500 /*
5501 * Pop-up of finished entities.
5502 */
5503 while ((RAW == 0) && (ctxt->inputNr > 1))
5504 xmlPopInput(ctxt);
5505 SKIP_BLANKS;
5506
5507 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005508 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005509 if (content != NULL) {
5510 xmlFreeDocElementContent(ctxt->myDoc, content);
5511 }
Owen Taylor3473f882001-02-23 17:55:21 +00005512 } else {
5513 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005514 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5515 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005516 }
5517
5518 NEXT;
5519 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005520 (ctxt->sax->elementDecl != NULL)) {
5521 if (content != NULL)
5522 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005523 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5524 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005525 if ((content != NULL) && (content->parent == NULL)) {
5526 /*
5527 * this is a trick: if xmlAddElementDecl is called,
5528 * instead of copying the full tree it is plugged directly
5529 * if called from the parser. Avoid duplicating the
5530 * interfaces or change the API/ABI
5531 */
5532 xmlFreeDocElementContent(ctxt->myDoc, content);
5533 }
5534 } else if (content != NULL) {
5535 xmlFreeDocElementContent(ctxt->myDoc, content);
5536 }
Owen Taylor3473f882001-02-23 17:55:21 +00005537 }
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 return(ret);
5540}
5541
5542/**
Owen Taylor3473f882001-02-23 17:55:21 +00005543 * xmlParseConditionalSections
5544 * @ctxt: an XML parser context
5545 *
5546 * [61] conditionalSect ::= includeSect | ignoreSect
5547 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5548 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5549 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5550 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5551 */
5552
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005553static void
Owen Taylor3473f882001-02-23 17:55:21 +00005554xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5555 SKIP(3);
5556 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005557 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005558 SKIP(7);
5559 SKIP_BLANKS;
5560 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005561 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005562 } else {
5563 NEXT;
5564 }
5565 if (xmlParserDebugEntities) {
5566 if ((ctxt->input != NULL) && (ctxt->input->filename))
5567 xmlGenericError(xmlGenericErrorContext,
5568 "%s(%d): ", ctxt->input->filename,
5569 ctxt->input->line);
5570 xmlGenericError(xmlGenericErrorContext,
5571 "Entering INCLUDE Conditional Section\n");
5572 }
5573
5574 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5575 (NXT(2) != '>'))) {
5576 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005577 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005578
5579 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5580 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005581 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005582 NEXT;
5583 } else if (RAW == '%') {
5584 xmlParsePEReference(ctxt);
5585 } else
5586 xmlParseMarkupDecl(ctxt);
5587
5588 /*
5589 * Pop-up of finished entities.
5590 */
5591 while ((RAW == 0) && (ctxt->inputNr > 1))
5592 xmlPopInput(ctxt);
5593
Daniel Veillardfdc91562002-07-01 21:52:03 +00005594 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005595 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005596 break;
5597 }
5598 }
5599 if (xmlParserDebugEntities) {
5600 if ((ctxt->input != NULL) && (ctxt->input->filename))
5601 xmlGenericError(xmlGenericErrorContext,
5602 "%s(%d): ", ctxt->input->filename,
5603 ctxt->input->line);
5604 xmlGenericError(xmlGenericErrorContext,
5605 "Leaving INCLUDE Conditional Section\n");
5606 }
5607
Daniel Veillarda07050d2003-10-19 14:46:32 +00005608 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005609 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005610 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005611 int depth = 0;
5612
5613 SKIP(6);
5614 SKIP_BLANKS;
5615 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005616 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005617 } else {
5618 NEXT;
5619 }
5620 if (xmlParserDebugEntities) {
5621 if ((ctxt->input != NULL) && (ctxt->input->filename))
5622 xmlGenericError(xmlGenericErrorContext,
5623 "%s(%d): ", ctxt->input->filename,
5624 ctxt->input->line);
5625 xmlGenericError(xmlGenericErrorContext,
5626 "Entering IGNORE Conditional Section\n");
5627 }
5628
5629 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005630 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005631 * But disable SAX event generating DTD building in the meantime
5632 */
5633 state = ctxt->disableSAX;
5634 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005635 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005636 ctxt->instate = XML_PARSER_IGNORE;
5637
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005638 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005639 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5640 depth++;
5641 SKIP(3);
5642 continue;
5643 }
5644 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5645 if (--depth >= 0) SKIP(3);
5646 continue;
5647 }
5648 NEXT;
5649 continue;
5650 }
5651
5652 ctxt->disableSAX = state;
5653 ctxt->instate = instate;
5654
5655 if (xmlParserDebugEntities) {
5656 if ((ctxt->input != NULL) && (ctxt->input->filename))
5657 xmlGenericError(xmlGenericErrorContext,
5658 "%s(%d): ", ctxt->input->filename,
5659 ctxt->input->line);
5660 xmlGenericError(xmlGenericErrorContext,
5661 "Leaving IGNORE Conditional Section\n");
5662 }
5663
5664 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005665 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005666 }
5667
5668 if (RAW == 0)
5669 SHRINK;
5670
5671 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005672 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 } else {
5674 SKIP(3);
5675 }
5676}
5677
5678/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005679 * xmlParseMarkupDecl:
5680 * @ctxt: an XML parser context
5681 *
5682 * parse Markup declarations
5683 *
5684 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5685 * NotationDecl | PI | Comment
5686 *
5687 * [ VC: Proper Declaration/PE Nesting ]
5688 * Parameter-entity replacement text must be properly nested with
5689 * markup declarations. That is to say, if either the first character
5690 * or the last character of a markup declaration (markupdecl above) is
5691 * contained in the replacement text for a parameter-entity reference,
5692 * both must be contained in the same replacement text.
5693 *
5694 * [ WFC: PEs in Internal Subset ]
5695 * In the internal DTD subset, parameter-entity references can occur
5696 * only where markup declarations can occur, not within markup declarations.
5697 * (This does not apply to references that occur in external parameter
5698 * entities or to the external subset.)
5699 */
5700void
5701xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5702 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005703 if (CUR == '<') {
5704 if (NXT(1) == '!') {
5705 switch (NXT(2)) {
5706 case 'E':
5707 if (NXT(3) == 'L')
5708 xmlParseElementDecl(ctxt);
5709 else if (NXT(3) == 'N')
5710 xmlParseEntityDecl(ctxt);
5711 break;
5712 case 'A':
5713 xmlParseAttributeListDecl(ctxt);
5714 break;
5715 case 'N':
5716 xmlParseNotationDecl(ctxt);
5717 break;
5718 case '-':
5719 xmlParseComment(ctxt);
5720 break;
5721 default:
5722 /* there is an error but it will be detected later */
5723 break;
5724 }
5725 } else if (NXT(1) == '?') {
5726 xmlParsePI(ctxt);
5727 }
5728 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005729 /*
5730 * This is only for internal subset. On external entities,
5731 * the replacement is done before parsing stage
5732 */
5733 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5734 xmlParsePEReference(ctxt);
5735
5736 /*
5737 * Conditional sections are allowed from entities included
5738 * by PE References in the internal subset.
5739 */
5740 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5741 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5742 xmlParseConditionalSections(ctxt);
5743 }
5744 }
5745
5746 ctxt->instate = XML_PARSER_DTD;
5747}
5748
5749/**
5750 * xmlParseTextDecl:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse an XML declaration header for external entities
5754 *
5755 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5756 *
5757 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5758 */
5759
5760void
5761xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5762 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005763 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005764
5765 /*
5766 * We know that '<?xml' is here.
5767 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005768 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005769 SKIP(5);
5770 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005771 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005772 return;
5773 }
5774
William M. Brack76e95df2003-10-18 16:20:14 +00005775 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5777 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005778 }
5779 SKIP_BLANKS;
5780
5781 /*
5782 * We may have the VersionInfo here.
5783 */
5784 version = xmlParseVersionInfo(ctxt);
5785 if (version == NULL)
5786 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005787 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005788 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005789 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5790 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005791 }
5792 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005793 ctxt->input->version = version;
5794
5795 /*
5796 * We must have the encoding declaration
5797 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005798 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005799 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5800 /*
5801 * The XML REC instructs us to stop parsing right here
5802 */
5803 return;
5804 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005805 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5806 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5807 "Missing encoding in text declaration\n");
5808 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005809
5810 SKIP_BLANKS;
5811 if ((RAW == '?') && (NXT(1) == '>')) {
5812 SKIP(2);
5813 } else if (RAW == '>') {
5814 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005815 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005816 NEXT;
5817 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005818 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005819 MOVETO_ENDTAG(CUR_PTR);
5820 NEXT;
5821 }
5822}
5823
5824/**
Owen Taylor3473f882001-02-23 17:55:21 +00005825 * xmlParseExternalSubset:
5826 * @ctxt: an XML parser context
5827 * @ExternalID: the external identifier
5828 * @SystemID: the system identifier (or URL)
5829 *
5830 * parse Markup declarations from an external subset
5831 *
5832 * [30] extSubset ::= textDecl? extSubsetDecl
5833 *
5834 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5835 */
5836void
5837xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5838 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005839 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005840 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005841 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005842 xmlParseTextDecl(ctxt);
5843 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5844 /*
5845 * The XML REC instructs us to stop parsing right here
5846 */
5847 ctxt->instate = XML_PARSER_EOF;
5848 return;
5849 }
5850 }
5851 if (ctxt->myDoc == NULL) {
5852 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5853 }
5854 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5855 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5856
5857 ctxt->instate = XML_PARSER_DTD;
5858 ctxt->external = 1;
5859 while (((RAW == '<') && (NXT(1) == '?')) ||
5860 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005861 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005862 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005863 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005864
5865 GROW;
5866 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5867 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005868 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005869 NEXT;
5870 } else if (RAW == '%') {
5871 xmlParsePEReference(ctxt);
5872 } else
5873 xmlParseMarkupDecl(ctxt);
5874
5875 /*
5876 * Pop-up of finished entities.
5877 */
5878 while ((RAW == 0) && (ctxt->inputNr > 1))
5879 xmlPopInput(ctxt);
5880
Daniel Veillardfdc91562002-07-01 21:52:03 +00005881 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005882 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005883 break;
5884 }
5885 }
5886
5887 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005888 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 }
5890
5891}
5892
5893/**
5894 * xmlParseReference:
5895 * @ctxt: an XML parser context
5896 *
5897 * parse and handle entity references in content, depending on the SAX
5898 * interface, this may end-up in a call to character() if this is a
5899 * CharRef, a predefined entity, if there is no reference() callback.
5900 * or if the parser was asked to switch to that mode.
5901 *
5902 * [67] Reference ::= EntityRef | CharRef
5903 */
5904void
5905xmlParseReference(xmlParserCtxtPtr ctxt) {
5906 xmlEntityPtr ent;
5907 xmlChar *val;
5908 if (RAW != '&') return;
5909
5910 if (NXT(1) == '#') {
5911 int i = 0;
5912 xmlChar out[10];
5913 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005914 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005915
5916 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5917 /*
5918 * So we are using non-UTF-8 buffers
5919 * Check that the char fit on 8bits, if not
5920 * generate a CharRef.
5921 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005922 if (value <= 0xFF) {
5923 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005924 out[1] = 0;
5925 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5926 (!ctxt->disableSAX))
5927 ctxt->sax->characters(ctxt->userData, out, 1);
5928 } else {
5929 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005930 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005931 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005932 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5934 (!ctxt->disableSAX))
5935 ctxt->sax->reference(ctxt->userData, out);
5936 }
5937 } else {
5938 /*
5939 * Just encode the value in UTF-8
5940 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005941 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 out[i] = 0;
5943 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5944 (!ctxt->disableSAX))
5945 ctxt->sax->characters(ctxt->userData, out, i);
5946 }
5947 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005948 int was_checked;
5949
Owen Taylor3473f882001-02-23 17:55:21 +00005950 ent = xmlParseEntityRef(ctxt);
5951 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005952 if (!ctxt->wellFormed)
5953 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005954 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005955 if ((ent->name != NULL) &&
5956 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5957 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005958 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005959
5960
5961 /*
5962 * The first reference to the entity trigger a parsing phase
5963 * where the ent->children is filled with the result from
5964 * the parsing.
5965 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005966 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005967 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005968
Owen Taylor3473f882001-02-23 17:55:21 +00005969 value = ent->content;
5970
5971 /*
5972 * Check that this entity is well formed
5973 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005974 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005975 (value[1] == 0) && (value[0] == '<') &&
5976 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5977 /*
5978 * DONE: get definite answer on this !!!
5979 * Lots of entity decls are used to declare a single
5980 * char
5981 * <!ENTITY lt "<">
5982 * Which seems to be valid since
5983 * 2.4: The ampersand character (&) and the left angle
5984 * bracket (<) may appear in their literal form only
5985 * when used ... They are also legal within the literal
5986 * entity value of an internal entity declaration;i
5987 * see "4.3.2 Well-Formed Parsed Entities".
5988 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5989 * Looking at the OASIS test suite and James Clark
5990 * tests, this is broken. However the XML REC uses
5991 * it. Is the XML REC not well-formed ????
5992 * This is a hack to avoid this problem
5993 *
5994 * ANSWER: since lt gt amp .. are already defined,
5995 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005996 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005997 * is lousy but acceptable.
5998 */
5999 list = xmlNewDocText(ctxt->myDoc, value);
6000 if (list != NULL) {
6001 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6002 (ent->children == NULL)) {
6003 ent->children = list;
6004 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006005 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006006 list->parent = (xmlNodePtr) ent;
6007 } else {
6008 xmlFreeNodeList(list);
6009 }
6010 } else if (list != NULL) {
6011 xmlFreeNodeList(list);
6012 }
6013 } else {
6014 /*
6015 * 4.3.2: An internal general parsed entity is well-formed
6016 * if its replacement text matches the production labeled
6017 * content.
6018 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006019
6020 void *user_data;
6021 /*
6022 * This is a bit hackish but this seems the best
6023 * way to make sure both SAX and DOM entity support
6024 * behaves okay.
6025 */
6026 if (ctxt->userData == ctxt)
6027 user_data = NULL;
6028 else
6029 user_data = ctxt->userData;
6030
Owen Taylor3473f882001-02-23 17:55:21 +00006031 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6032 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006033 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6034 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006035 ctxt->depth--;
6036 } else if (ent->etype ==
6037 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6038 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006039 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006040 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006041 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006042 ctxt->depth--;
6043 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006044 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006045 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6046 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006047 }
6048 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006049 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006050 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006051 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006052 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6053 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006054 (ent->children == NULL)) {
6055 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006056 if (ctxt->replaceEntities) {
6057 /*
6058 * Prune it directly in the generated document
6059 * except for single text nodes.
6060 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006061 if (((list->type == XML_TEXT_NODE) &&
6062 (list->next == NULL)) ||
6063 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006064 list->parent = (xmlNodePtr) ent;
6065 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006066 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006067 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006068 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006069 while (list != NULL) {
6070 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006071 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006072 if (list->next == NULL)
6073 ent->last = list;
6074 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006075 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006076 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006077#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006078 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6079 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006080#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006081 }
6082 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006083 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006084 while (list != NULL) {
6085 list->parent = (xmlNodePtr) ent;
6086 if (list->next == NULL)
6087 ent->last = list;
6088 list = list->next;
6089 }
Owen Taylor3473f882001-02-23 17:55:21 +00006090 }
6091 } else {
6092 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006093 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006095 } else if ((ret != XML_ERR_OK) &&
6096 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006097 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 } else if (list != NULL) {
6099 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006100 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006101 }
6102 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006103 ent->checked = 1;
6104 }
6105
6106 if (ent->children == NULL) {
6107 /*
6108 * Probably running in SAX mode and the callbacks don't
6109 * build the entity content. So unless we already went
6110 * though parsing for first checking go though the entity
6111 * content to generate callbacks associated to the entity
6112 */
6113 if (was_checked == 1) {
6114 void *user_data;
6115 /*
6116 * This is a bit hackish but this seems the best
6117 * way to make sure both SAX and DOM entity support
6118 * behaves okay.
6119 */
6120 if (ctxt->userData == ctxt)
6121 user_data = NULL;
6122 else
6123 user_data = ctxt->userData;
6124
6125 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6126 ctxt->depth++;
6127 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6128 ent->content, user_data, NULL);
6129 ctxt->depth--;
6130 } else if (ent->etype ==
6131 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6132 ctxt->depth++;
6133 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6134 ctxt->sax, user_data, ctxt->depth,
6135 ent->URI, ent->ExternalID, NULL);
6136 ctxt->depth--;
6137 } else {
6138 ret = XML_ERR_ENTITY_PE_INTERNAL;
6139 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6140 "invalid entity type found\n", NULL);
6141 }
6142 if (ret == XML_ERR_ENTITY_LOOP) {
6143 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6144 return;
6145 }
6146 }
6147 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6148 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6149 /*
6150 * Entity reference callback comes second, it's somewhat
6151 * superfluous but a compatibility to historical behaviour
6152 */
6153 ctxt->sax->reference(ctxt->userData, ent->name);
6154 }
6155 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006156 }
6157 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006158 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006159 /*
6160 * Create a node.
6161 */
6162 ctxt->sax->reference(ctxt->userData, ent->name);
6163 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006164 }
6165 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006166 /*
6167 * There is a problem on the handling of _private for entities
6168 * (bug 155816): Should we copy the content of the field from
6169 * the entity (possibly overwriting some value set by the user
6170 * when a copy is created), should we leave it alone, or should
6171 * we try to take care of different situations? The problem
6172 * is exacerbated by the usage of this field by the xmlReader.
6173 * To fix this bug, we look at _private on the created node
6174 * and, if it's NULL, we copy in whatever was in the entity.
6175 * If it's not NULL we leave it alone. This is somewhat of a
6176 * hack - maybe we should have further tests to determine
6177 * what to do.
6178 */
Owen Taylor3473f882001-02-23 17:55:21 +00006179 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6180 /*
6181 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006182 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006183 * In the first occurrence list contains the replacement.
6184 * progressive == 2 means we are operating on the Reader
6185 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006186 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006187 if (((list == NULL) && (ent->owner == 0)) ||
6188 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006189 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006190
6191 /*
6192 * when operating on a reader, the entities definitions
6193 * are always owning the entities subtree.
6194 if (ctxt->parseMode == XML_PARSE_READER)
6195 ent->owner = 1;
6196 */
6197
Daniel Veillard62f313b2001-07-04 19:49:14 +00006198 cur = ent->children;
6199 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006200 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006201 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006202 if (nw->_private == NULL)
6203 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006204 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006205 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006206 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006207 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006208 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006209 if (cur == ent->last) {
6210 /*
6211 * needed to detect some strange empty
6212 * node cases in the reader tests
6213 */
6214 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006215 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006216 (nw->type == XML_ELEMENT_NODE) &&
6217 (nw->children == NULL))
6218 nw->extra = 1;
6219
Daniel Veillard62f313b2001-07-04 19:49:14 +00006220 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006221 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006222 cur = cur->next;
6223 }
Daniel Veillard81273902003-09-30 00:43:48 +00006224#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006225 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006226 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006227#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006228 } else if (list == NULL) {
6229 xmlNodePtr nw = NULL, cur, next, last,
6230 firstChild = NULL;
6231 /*
6232 * Copy the entity child list and make it the new
6233 * entity child list. The goal is to make sure any
6234 * ID or REF referenced will be the one from the
6235 * document content and not the entity copy.
6236 */
6237 cur = ent->children;
6238 ent->children = NULL;
6239 last = ent->last;
6240 ent->last = NULL;
6241 while (cur != NULL) {
6242 next = cur->next;
6243 cur->next = NULL;
6244 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006245 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006246 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006247 if (nw->_private == NULL)
6248 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006249 if (firstChild == NULL){
6250 firstChild = cur;
6251 }
6252 xmlAddChild((xmlNodePtr) ent, nw);
6253 xmlAddChild(ctxt->node, cur);
6254 }
6255 if (cur == last)
6256 break;
6257 cur = next;
6258 }
6259 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006260#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006261 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6262 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006263#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006264 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006265 const xmlChar *nbktext;
6266
Daniel Veillard62f313b2001-07-04 19:49:14 +00006267 /*
6268 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006269 * node with a possible previous text one which
6270 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006271 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006272 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6273 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006274 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006275 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006276 if ((ent->last != ent->children) &&
6277 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006278 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006279 xmlAddChildList(ctxt->node, ent->children);
6280 }
6281
Owen Taylor3473f882001-02-23 17:55:21 +00006282 /*
6283 * This is to avoid a nasty side effect, see
6284 * characters() in SAX.c
6285 */
6286 ctxt->nodemem = 0;
6287 ctxt->nodelen = 0;
6288 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006289 }
6290 }
6291 } else {
6292 val = ent->content;
6293 if (val == NULL) return;
6294 /*
6295 * inline the entity.
6296 */
6297 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6298 (!ctxt->disableSAX))
6299 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6300 }
6301 }
6302}
6303
6304/**
6305 * xmlParseEntityRef:
6306 * @ctxt: an XML parser context
6307 *
6308 * parse ENTITY references declarations
6309 *
6310 * [68] EntityRef ::= '&' Name ';'
6311 *
6312 * [ WFC: Entity Declared ]
6313 * In a document without any DTD, a document with only an internal DTD
6314 * subset which contains no parameter entity references, or a document
6315 * with "standalone='yes'", the Name given in the entity reference
6316 * must match that in an entity declaration, except that well-formed
6317 * documents need not declare any of the following entities: amp, lt,
6318 * gt, apos, quot. The declaration of a parameter entity must precede
6319 * any reference to it. Similarly, the declaration of a general entity
6320 * must precede any reference to it which appears in a default value in an
6321 * attribute-list declaration. Note that if entities are declared in the
6322 * external subset or in external parameter entities, a non-validating
6323 * processor is not obligated to read and process their declarations;
6324 * for such documents, the rule that an entity must be declared is a
6325 * well-formedness constraint only if standalone='yes'.
6326 *
6327 * [ WFC: Parsed Entity ]
6328 * An entity reference must not contain the name of an unparsed entity
6329 *
6330 * Returns the xmlEntityPtr if found, or NULL otherwise.
6331 */
6332xmlEntityPtr
6333xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006334 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006335 xmlEntityPtr ent = NULL;
6336
6337 GROW;
6338
6339 if (RAW == '&') {
6340 NEXT;
6341 name = xmlParseName(ctxt);
6342 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006343 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6344 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006345 } else {
6346 if (RAW == ';') {
6347 NEXT;
6348 /*
6349 * Ask first SAX for entity resolution, otherwise try the
6350 * predefined set.
6351 */
6352 if (ctxt->sax != NULL) {
6353 if (ctxt->sax->getEntity != NULL)
6354 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006355 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006356 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006357 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6358 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006359 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006360 }
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 /*
6363 * [ WFC: Entity Declared ]
6364 * In a document without any DTD, a document with only an
6365 * internal DTD subset which contains no parameter entity
6366 * references, or a document with "standalone='yes'", the
6367 * Name given in the entity reference must match that in an
6368 * entity declaration, except that well-formed documents
6369 * need not declare any of the following entities: amp, lt,
6370 * gt, apos, quot.
6371 * The declaration of a parameter entity must precede any
6372 * reference to it.
6373 * Similarly, the declaration of a general entity must
6374 * precede any reference to it which appears in a default
6375 * value in an attribute-list declaration. Note that if
6376 * entities are declared in the external subset or in
6377 * external parameter entities, a non-validating processor
6378 * is not obligated to read and process their declarations;
6379 * for such documents, the rule that an entity must be
6380 * declared is a well-formedness constraint only if
6381 * standalone='yes'.
6382 */
6383 if (ent == NULL) {
6384 if ((ctxt->standalone == 1) ||
6385 ((ctxt->hasExternalSubset == 0) &&
6386 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006387 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006388 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006389 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006390 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006391 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006392 if ((ctxt->inSubset == 0) &&
6393 (ctxt->sax != NULL) &&
6394 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006395 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006396 }
Owen Taylor3473f882001-02-23 17:55:21 +00006397 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006398 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006399 }
6400
6401 /*
6402 * [ WFC: Parsed Entity ]
6403 * An entity reference must not contain the name of an
6404 * unparsed entity
6405 */
6406 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006407 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006408 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006409 }
6410
6411 /*
6412 * [ WFC: No External Entity References ]
6413 * Attribute values cannot contain direct or indirect
6414 * entity references to external entities.
6415 */
6416 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6417 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006418 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6419 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
6421 /*
6422 * [ WFC: No < in Attribute Values ]
6423 * The replacement text of any entity referred to directly or
6424 * indirectly in an attribute value (other than "&lt;") must
6425 * not contain a <.
6426 */
6427 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6428 (ent != NULL) &&
6429 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6430 (ent->content != NULL) &&
6431 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006432 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006433 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006434 }
6435
6436 /*
6437 * Internal check, no parameter entities here ...
6438 */
6439 else {
6440 switch (ent->etype) {
6441 case XML_INTERNAL_PARAMETER_ENTITY:
6442 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006443 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6444 "Attempt to reference the parameter entity '%s'\n",
6445 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006446 break;
6447 default:
6448 break;
6449 }
6450 }
6451
6452 /*
6453 * [ WFC: No Recursion ]
6454 * A parsed entity must not contain a recursive reference
6455 * to itself, either directly or indirectly.
6456 * Done somewhere else
6457 */
6458
6459 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006460 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006461 }
Owen Taylor3473f882001-02-23 17:55:21 +00006462 }
6463 }
6464 return(ent);
6465}
6466
6467/**
6468 * xmlParseStringEntityRef:
6469 * @ctxt: an XML parser context
6470 * @str: a pointer to an index in the string
6471 *
6472 * parse ENTITY references declarations, but this version parses it from
6473 * a string value.
6474 *
6475 * [68] EntityRef ::= '&' Name ';'
6476 *
6477 * [ WFC: Entity Declared ]
6478 * In a document without any DTD, a document with only an internal DTD
6479 * subset which contains no parameter entity references, or a document
6480 * with "standalone='yes'", the Name given in the entity reference
6481 * must match that in an entity declaration, except that well-formed
6482 * documents need not declare any of the following entities: amp, lt,
6483 * gt, apos, quot. The declaration of a parameter entity must precede
6484 * any reference to it. Similarly, the declaration of a general entity
6485 * must precede any reference to it which appears in a default value in an
6486 * attribute-list declaration. Note that if entities are declared in the
6487 * external subset or in external parameter entities, a non-validating
6488 * processor is not obligated to read and process their declarations;
6489 * for such documents, the rule that an entity must be declared is a
6490 * well-formedness constraint only if standalone='yes'.
6491 *
6492 * [ WFC: Parsed Entity ]
6493 * An entity reference must not contain the name of an unparsed entity
6494 *
6495 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6496 * is updated to the current location in the string.
6497 */
6498xmlEntityPtr
6499xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6500 xmlChar *name;
6501 const xmlChar *ptr;
6502 xmlChar cur;
6503 xmlEntityPtr ent = NULL;
6504
6505 if ((str == NULL) || (*str == NULL))
6506 return(NULL);
6507 ptr = *str;
6508 cur = *ptr;
6509 if (cur == '&') {
6510 ptr++;
6511 cur = *ptr;
6512 name = xmlParseStringName(ctxt, &ptr);
6513 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006514 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6515 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006516 } else {
6517 if (*ptr == ';') {
6518 ptr++;
6519 /*
6520 * Ask first SAX for entity resolution, otherwise try the
6521 * predefined set.
6522 */
6523 if (ctxt->sax != NULL) {
6524 if (ctxt->sax->getEntity != NULL)
6525 ent = ctxt->sax->getEntity(ctxt->userData, name);
6526 if (ent == NULL)
6527 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006528 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006529 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006530 }
Owen Taylor3473f882001-02-23 17:55:21 +00006531 }
6532 /*
6533 * [ WFC: Entity Declared ]
6534 * In a document without any DTD, a document with only an
6535 * internal DTD subset which contains no parameter entity
6536 * references, or a document with "standalone='yes'", the
6537 * Name given in the entity reference must match that in an
6538 * entity declaration, except that well-formed documents
6539 * need not declare any of the following entities: amp, lt,
6540 * gt, apos, quot.
6541 * The declaration of a parameter entity must precede any
6542 * reference to it.
6543 * Similarly, the declaration of a general entity must
6544 * precede any reference to it which appears in a default
6545 * value in an attribute-list declaration. Note that if
6546 * entities are declared in the external subset or in
6547 * external parameter entities, a non-validating processor
6548 * is not obligated to read and process their declarations;
6549 * for such documents, the rule that an entity must be
6550 * declared is a well-formedness constraint only if
6551 * standalone='yes'.
6552 */
6553 if (ent == NULL) {
6554 if ((ctxt->standalone == 1) ||
6555 ((ctxt->hasExternalSubset == 0) &&
6556 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006557 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006558 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006559 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006560 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006561 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006563 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006564 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
6566
6567 /*
6568 * [ WFC: Parsed Entity ]
6569 * An entity reference must not contain the name of an
6570 * unparsed entity
6571 */
6572 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006574 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006575 }
6576
6577 /*
6578 * [ WFC: No External Entity References ]
6579 * Attribute values cannot contain direct or indirect
6580 * entity references to external entities.
6581 */
6582 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6583 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006584 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006585 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006586 }
6587 /*
6588 * [ WFC: No < in Attribute Values ]
6589 * The replacement text of any entity referred to directly or
6590 * indirectly in an attribute value (other than "&lt;") must
6591 * not contain a <.
6592 */
6593 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6594 (ent != NULL) &&
6595 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6596 (ent->content != NULL) &&
6597 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006598 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6599 "'<' in entity '%s' is not allowed in attributes values\n",
6600 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006601 }
6602
6603 /*
6604 * Internal check, no parameter entities here ...
6605 */
6606 else {
6607 switch (ent->etype) {
6608 case XML_INTERNAL_PARAMETER_ENTITY:
6609 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006610 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6611 "Attempt to reference the parameter entity '%s'\n",
6612 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006613 break;
6614 default:
6615 break;
6616 }
6617 }
6618
6619 /*
6620 * [ WFC: No Recursion ]
6621 * A parsed entity must not contain a recursive reference
6622 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006623 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006624 */
6625
6626 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006627 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006628 }
6629 xmlFree(name);
6630 }
6631 }
6632 *str = ptr;
6633 return(ent);
6634}
6635
6636/**
6637 * xmlParsePEReference:
6638 * @ctxt: an XML parser context
6639 *
6640 * parse PEReference declarations
6641 * The entity content is handled directly by pushing it's content as
6642 * a new input stream.
6643 *
6644 * [69] PEReference ::= '%' Name ';'
6645 *
6646 * [ WFC: No Recursion ]
6647 * A parsed entity must not contain a recursive
6648 * reference to itself, either directly or indirectly.
6649 *
6650 * [ WFC: Entity Declared ]
6651 * In a document without any DTD, a document with only an internal DTD
6652 * subset which contains no parameter entity references, or a document
6653 * with "standalone='yes'", ... ... The declaration of a parameter
6654 * entity must precede any reference to it...
6655 *
6656 * [ VC: Entity Declared ]
6657 * In a document with an external subset or external parameter entities
6658 * with "standalone='no'", ... ... The declaration of a parameter entity
6659 * must precede any reference to it...
6660 *
6661 * [ WFC: In DTD ]
6662 * Parameter-entity references may only appear in the DTD.
6663 * NOTE: misleading but this is handled.
6664 */
6665void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006666xmlParsePEReference(xmlParserCtxtPtr ctxt)
6667{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006668 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006669 xmlEntityPtr entity = NULL;
6670 xmlParserInputPtr input;
6671
6672 if (RAW == '%') {
6673 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006674 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006675 if (name == NULL) {
6676 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6677 "xmlParsePEReference: no name\n");
6678 } else {
6679 if (RAW == ';') {
6680 NEXT;
6681 if ((ctxt->sax != NULL) &&
6682 (ctxt->sax->getParameterEntity != NULL))
6683 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6684 name);
6685 if (entity == NULL) {
6686 /*
6687 * [ WFC: Entity Declared ]
6688 * In a document without any DTD, a document with only an
6689 * internal DTD subset which contains no parameter entity
6690 * references, or a document with "standalone='yes'", ...
6691 * ... The declaration of a parameter entity must precede
6692 * any reference to it...
6693 */
6694 if ((ctxt->standalone == 1) ||
6695 ((ctxt->hasExternalSubset == 0) &&
6696 (ctxt->hasPErefs == 0))) {
6697 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6698 "PEReference: %%%s; not found\n",
6699 name);
6700 } else {
6701 /*
6702 * [ VC: Entity Declared ]
6703 * In a document with an external subset or external
6704 * parameter entities with "standalone='no'", ...
6705 * ... The declaration of a parameter entity must
6706 * precede any reference to it...
6707 */
6708 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6709 "PEReference: %%%s; not found\n",
6710 name, NULL);
6711 ctxt->valid = 0;
6712 }
6713 } else {
6714 /*
6715 * Internal checking in case the entity quest barfed
6716 */
6717 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6718 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6719 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6720 "Internal: %%%s; is not a parameter entity\n",
6721 name, NULL);
6722 } else if (ctxt->input->free != deallocblankswrapper) {
6723 input =
6724 xmlNewBlanksWrapperInputStream(ctxt, entity);
6725 xmlPushInput(ctxt, input);
6726 } else {
6727 /*
6728 * TODO !!!
6729 * handle the extra spaces added before and after
6730 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6731 */
6732 input = xmlNewEntityInputStream(ctxt, entity);
6733 xmlPushInput(ctxt, input);
6734 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006735 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006736 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006737 xmlParseTextDecl(ctxt);
6738 if (ctxt->errNo ==
6739 XML_ERR_UNSUPPORTED_ENCODING) {
6740 /*
6741 * The XML REC instructs us to stop parsing
6742 * right here
6743 */
6744 ctxt->instate = XML_PARSER_EOF;
6745 return;
6746 }
6747 }
6748 }
6749 }
6750 ctxt->hasPErefs = 1;
6751 } else {
6752 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6753 }
6754 }
Owen Taylor3473f882001-02-23 17:55:21 +00006755 }
6756}
6757
6758/**
6759 * xmlParseStringPEReference:
6760 * @ctxt: an XML parser context
6761 * @str: a pointer to an index in the string
6762 *
6763 * parse PEReference declarations
6764 *
6765 * [69] PEReference ::= '%' Name ';'
6766 *
6767 * [ WFC: No Recursion ]
6768 * A parsed entity must not contain a recursive
6769 * reference to itself, either directly or indirectly.
6770 *
6771 * [ WFC: Entity Declared ]
6772 * In a document without any DTD, a document with only an internal DTD
6773 * subset which contains no parameter entity references, or a document
6774 * with "standalone='yes'", ... ... The declaration of a parameter
6775 * entity must precede any reference to it...
6776 *
6777 * [ VC: Entity Declared ]
6778 * In a document with an external subset or external parameter entities
6779 * with "standalone='no'", ... ... The declaration of a parameter entity
6780 * must precede any reference to it...
6781 *
6782 * [ WFC: In DTD ]
6783 * Parameter-entity references may only appear in the DTD.
6784 * NOTE: misleading but this is handled.
6785 *
6786 * Returns the string of the entity content.
6787 * str is updated to the current value of the index
6788 */
6789xmlEntityPtr
6790xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6791 const xmlChar *ptr;
6792 xmlChar cur;
6793 xmlChar *name;
6794 xmlEntityPtr entity = NULL;
6795
6796 if ((str == NULL) || (*str == NULL)) return(NULL);
6797 ptr = *str;
6798 cur = *ptr;
6799 if (cur == '%') {
6800 ptr++;
6801 cur = *ptr;
6802 name = xmlParseStringName(ctxt, &ptr);
6803 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006804 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6805 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006806 } else {
6807 cur = *ptr;
6808 if (cur == ';') {
6809 ptr++;
6810 cur = *ptr;
6811 if ((ctxt->sax != NULL) &&
6812 (ctxt->sax->getParameterEntity != NULL))
6813 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6814 name);
6815 if (entity == NULL) {
6816 /*
6817 * [ WFC: Entity Declared ]
6818 * In a document without any DTD, a document with only an
6819 * internal DTD subset which contains no parameter entity
6820 * references, or a document with "standalone='yes'", ...
6821 * ... The declaration of a parameter entity must precede
6822 * any reference to it...
6823 */
6824 if ((ctxt->standalone == 1) ||
6825 ((ctxt->hasExternalSubset == 0) &&
6826 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006827 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006828 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006829 } else {
6830 /*
6831 * [ VC: Entity Declared ]
6832 * In a document with an external subset or external
6833 * parameter entities with "standalone='no'", ...
6834 * ... The declaration of a parameter entity must
6835 * precede any reference to it...
6836 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006837 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6838 "PEReference: %%%s; not found\n",
6839 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006840 ctxt->valid = 0;
6841 }
6842 } else {
6843 /*
6844 * Internal checking in case the entity quest barfed
6845 */
6846 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6847 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006848 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6849 "%%%s; is not a parameter entity\n",
6850 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006851 }
6852 }
6853 ctxt->hasPErefs = 1;
6854 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006855 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006856 }
6857 xmlFree(name);
6858 }
6859 }
6860 *str = ptr;
6861 return(entity);
6862}
6863
6864/**
6865 * xmlParseDocTypeDecl:
6866 * @ctxt: an XML parser context
6867 *
6868 * parse a DOCTYPE declaration
6869 *
6870 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6871 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6872 *
6873 * [ VC: Root Element Type ]
6874 * The Name in the document type declaration must match the element
6875 * type of the root element.
6876 */
6877
6878void
6879xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006880 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006881 xmlChar *ExternalID = NULL;
6882 xmlChar *URI = NULL;
6883
6884 /*
6885 * We know that '<!DOCTYPE' has been detected.
6886 */
6887 SKIP(9);
6888
6889 SKIP_BLANKS;
6890
6891 /*
6892 * Parse the DOCTYPE name.
6893 */
6894 name = xmlParseName(ctxt);
6895 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006896 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6897 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006898 }
6899 ctxt->intSubName = name;
6900
6901 SKIP_BLANKS;
6902
6903 /*
6904 * Check for SystemID and ExternalID
6905 */
6906 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6907
6908 if ((URI != NULL) || (ExternalID != NULL)) {
6909 ctxt->hasExternalSubset = 1;
6910 }
6911 ctxt->extSubURI = URI;
6912 ctxt->extSubSystem = ExternalID;
6913
6914 SKIP_BLANKS;
6915
6916 /*
6917 * Create and update the internal subset.
6918 */
6919 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6920 (!ctxt->disableSAX))
6921 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6922
6923 /*
6924 * Is there any internal subset declarations ?
6925 * they are handled separately in xmlParseInternalSubset()
6926 */
6927 if (RAW == '[')
6928 return;
6929
6930 /*
6931 * We should be at the end of the DOCTYPE declaration.
6932 */
6933 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006934 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006935 }
6936 NEXT;
6937}
6938
6939/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006940 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006941 * @ctxt: an XML parser context
6942 *
6943 * parse the internal subset declaration
6944 *
6945 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6946 */
6947
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006948static void
Owen Taylor3473f882001-02-23 17:55:21 +00006949xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6950 /*
6951 * Is there any DTD definition ?
6952 */
6953 if (RAW == '[') {
6954 ctxt->instate = XML_PARSER_DTD;
6955 NEXT;
6956 /*
6957 * Parse the succession of Markup declarations and
6958 * PEReferences.
6959 * Subsequence (markupdecl | PEReference | S)*
6960 */
6961 while (RAW != ']') {
6962 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006963 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006964
6965 SKIP_BLANKS;
6966 xmlParseMarkupDecl(ctxt);
6967 xmlParsePEReference(ctxt);
6968
6969 /*
6970 * Pop-up of finished entities.
6971 */
6972 while ((RAW == 0) && (ctxt->inputNr > 1))
6973 xmlPopInput(ctxt);
6974
6975 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006976 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006977 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006978 break;
6979 }
6980 }
6981 if (RAW == ']') {
6982 NEXT;
6983 SKIP_BLANKS;
6984 }
6985 }
6986
6987 /*
6988 * We should be at the end of the DOCTYPE declaration.
6989 */
6990 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006991 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006992 }
6993 NEXT;
6994}
6995
Daniel Veillard81273902003-09-30 00:43:48 +00006996#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006997/**
6998 * xmlParseAttribute:
6999 * @ctxt: an XML parser context
7000 * @value: a xmlChar ** used to store the value of the attribute
7001 *
7002 * parse an attribute
7003 *
7004 * [41] Attribute ::= Name Eq AttValue
7005 *
7006 * [ WFC: No External Entity References ]
7007 * Attribute values cannot contain direct or indirect entity references
7008 * to external entities.
7009 *
7010 * [ WFC: No < in Attribute Values ]
7011 * The replacement text of any entity referred to directly or indirectly in
7012 * an attribute value (other than "&lt;") must not contain a <.
7013 *
7014 * [ VC: Attribute Value Type ]
7015 * The attribute must have been declared; the value must be of the type
7016 * declared for it.
7017 *
7018 * [25] Eq ::= S? '=' S?
7019 *
7020 * With namespace:
7021 *
7022 * [NS 11] Attribute ::= QName Eq AttValue
7023 *
7024 * Also the case QName == xmlns:??? is handled independently as a namespace
7025 * definition.
7026 *
7027 * Returns the attribute name, and the value in *value.
7028 */
7029
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007030const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007031xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007032 const xmlChar *name;
7033 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007034
7035 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007036 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007037 name = xmlParseName(ctxt);
7038 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007039 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007040 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007041 return(NULL);
7042 }
7043
7044 /*
7045 * read the value
7046 */
7047 SKIP_BLANKS;
7048 if (RAW == '=') {
7049 NEXT;
7050 SKIP_BLANKS;
7051 val = xmlParseAttValue(ctxt);
7052 ctxt->instate = XML_PARSER_CONTENT;
7053 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007054 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007055 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007056 return(NULL);
7057 }
7058
7059 /*
7060 * Check that xml:lang conforms to the specification
7061 * No more registered as an error, just generate a warning now
7062 * since this was deprecated in XML second edition
7063 */
7064 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7065 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007066 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7067 "Malformed value for xml:lang : %s\n",
7068 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007069 }
7070 }
7071
7072 /*
7073 * Check that xml:space conforms to the specification
7074 */
7075 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7076 if (xmlStrEqual(val, BAD_CAST "default"))
7077 *(ctxt->space) = 0;
7078 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7079 *(ctxt->space) = 1;
7080 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007081 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007082"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007083 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007084 }
7085 }
7086
7087 *value = val;
7088 return(name);
7089}
7090
7091/**
7092 * xmlParseStartTag:
7093 * @ctxt: an XML parser context
7094 *
7095 * parse a start of tag either for rule element or
7096 * EmptyElement. In both case we don't parse the tag closing chars.
7097 *
7098 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7099 *
7100 * [ WFC: Unique Att Spec ]
7101 * No attribute name may appear more than once in the same start-tag or
7102 * empty-element tag.
7103 *
7104 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7105 *
7106 * [ WFC: Unique Att Spec ]
7107 * No attribute name may appear more than once in the same start-tag or
7108 * empty-element tag.
7109 *
7110 * With namespace:
7111 *
7112 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7113 *
7114 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7115 *
7116 * Returns the element name parsed
7117 */
7118
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007119const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007120xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007121 const xmlChar *name;
7122 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007123 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007124 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007125 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007126 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 int i;
7128
7129 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007130 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007131
7132 name = xmlParseName(ctxt);
7133 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007134 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007135 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007136 return(NULL);
7137 }
7138
7139 /*
7140 * Now parse the attributes, it ends up with the ending
7141 *
7142 * (S Attribute)* S?
7143 */
7144 SKIP_BLANKS;
7145 GROW;
7146
Daniel Veillard21a0f912001-02-25 19:54:14 +00007147 while ((RAW != '>') &&
7148 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007149 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007150 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007151 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007152
7153 attname = xmlParseAttribute(ctxt, &attvalue);
7154 if ((attname != NULL) && (attvalue != NULL)) {
7155 /*
7156 * [ WFC: Unique Att Spec ]
7157 * No attribute name may appear more than once in the same
7158 * start-tag or empty-element tag.
7159 */
7160 for (i = 0; i < nbatts;i += 2) {
7161 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007162 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007163 xmlFree(attvalue);
7164 goto failed;
7165 }
7166 }
Owen Taylor3473f882001-02-23 17:55:21 +00007167 /*
7168 * Add the pair to atts
7169 */
7170 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007171 maxatts = 22; /* allow for 10 attrs by default */
7172 atts = (const xmlChar **)
7173 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007174 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007175 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007176 if (attvalue != NULL)
7177 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007178 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007179 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007180 ctxt->atts = atts;
7181 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007182 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007183 const xmlChar **n;
7184
Owen Taylor3473f882001-02-23 17:55:21 +00007185 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007186 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007187 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007188 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007189 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007190 if (attvalue != NULL)
7191 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007193 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007194 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007195 ctxt->atts = atts;
7196 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007197 }
7198 atts[nbatts++] = attname;
7199 atts[nbatts++] = attvalue;
7200 atts[nbatts] = NULL;
7201 atts[nbatts + 1] = NULL;
7202 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007203 if (attvalue != NULL)
7204 xmlFree(attvalue);
7205 }
7206
7207failed:
7208
Daniel Veillard3772de32002-12-17 10:31:45 +00007209 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007210 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7211 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007212 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007213 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7214 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007215 }
7216 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007217 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7218 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007219 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7220 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007221 break;
7222 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007223 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007224 GROW;
7225 }
7226
7227 /*
7228 * SAX: Start of Element !
7229 */
7230 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007231 (!ctxt->disableSAX)) {
7232 if (nbatts > 0)
7233 ctxt->sax->startElement(ctxt->userData, name, atts);
7234 else
7235 ctxt->sax->startElement(ctxt->userData, name, NULL);
7236 }
Owen Taylor3473f882001-02-23 17:55:21 +00007237
7238 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007239 /* Free only the content strings */
7240 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007241 if (atts[i] != NULL)
7242 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007243 }
7244 return(name);
7245}
7246
7247/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007248 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007249 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250 * @line: line of the start tag
7251 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007252 *
7253 * parse an end of tag
7254 *
7255 * [42] ETag ::= '</' Name S? '>'
7256 *
7257 * With namespace
7258 *
7259 * [NS 9] ETag ::= '</' QName S? '>'
7260 */
7261
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007262static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007263xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007264 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007265
7266 GROW;
7267 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007268 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007269 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007270 return;
7271 }
7272 SKIP(2);
7273
Daniel Veillard46de64e2002-05-29 08:21:33 +00007274 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007275
7276 /*
7277 * We should definitely be at the ending "S? '>'" part
7278 */
7279 GROW;
7280 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007281 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007282 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007283 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007284 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007285
7286 /*
7287 * [ WFC: Element Type Match ]
7288 * The Name in an element's end-tag must match the element type in the
7289 * start-tag.
7290 *
7291 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007292 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007293 if (name == NULL) name = BAD_CAST "unparseable";
7294 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007295 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007296 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007297 }
7298
7299 /*
7300 * SAX: End of Tag
7301 */
7302 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7303 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007304 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007305
Daniel Veillarde57ec792003-09-10 10:50:59 +00007306 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007307 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007308 return;
7309}
7310
7311/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007312 * xmlParseEndTag:
7313 * @ctxt: an XML parser context
7314 *
7315 * parse an end of tag
7316 *
7317 * [42] ETag ::= '</' Name S? '>'
7318 *
7319 * With namespace
7320 *
7321 * [NS 9] ETag ::= '</' QName S? '>'
7322 */
7323
7324void
7325xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007326 xmlParseEndTag1(ctxt, 0);
7327}
Daniel Veillard81273902003-09-30 00:43:48 +00007328#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007329
7330/************************************************************************
7331 * *
7332 * SAX 2 specific operations *
7333 * *
7334 ************************************************************************/
7335
7336static const xmlChar *
7337xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7338 int len = 0, l;
7339 int c;
7340 int count = 0;
7341
7342 /*
7343 * Handler for more complex cases
7344 */
7345 GROW;
7346 c = CUR_CHAR(l);
7347 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007348 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007349 return(NULL);
7350 }
7351
7352 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007353 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007354 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007355 (IS_COMBINING(c)) ||
7356 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007357 if (count++ > 100) {
7358 count = 0;
7359 GROW;
7360 }
7361 len += l;
7362 NEXTL(l);
7363 c = CUR_CHAR(l);
7364 }
7365 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7366}
7367
7368/*
7369 * xmlGetNamespace:
7370 * @ctxt: an XML parser context
7371 * @prefix: the prefix to lookup
7372 *
7373 * Lookup the namespace name for the @prefix (which ca be NULL)
7374 * The prefix must come from the @ctxt->dict dictionnary
7375 *
7376 * Returns the namespace name or NULL if not bound
7377 */
7378static const xmlChar *
7379xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7380 int i;
7381
Daniel Veillarde57ec792003-09-10 10:50:59 +00007382 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007383 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 if (ctxt->nsTab[i] == prefix) {
7385 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7386 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007388 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 return(NULL);
7390}
7391
7392/**
7393 * xmlParseNCName:
7394 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007395 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007396 *
7397 * parse an XML name.
7398 *
7399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7400 * CombiningChar | Extender
7401 *
7402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7403 *
7404 * Returns the Name parsed or NULL
7405 */
7406
7407static const xmlChar *
7408xmlParseNCName(xmlParserCtxtPtr ctxt) {
7409 const xmlChar *in;
7410 const xmlChar *ret;
7411 int count = 0;
7412
7413 /*
7414 * Accelerator for simple ASCII names
7415 */
7416 in = ctxt->input->cur;
7417 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7418 ((*in >= 0x41) && (*in <= 0x5A)) ||
7419 (*in == '_')) {
7420 in++;
7421 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7422 ((*in >= 0x41) && (*in <= 0x5A)) ||
7423 ((*in >= 0x30) && (*in <= 0x39)) ||
7424 (*in == '_') || (*in == '-') ||
7425 (*in == '.'))
7426 in++;
7427 if ((*in > 0) && (*in < 0x80)) {
7428 count = in - ctxt->input->cur;
7429 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7430 ctxt->input->cur = in;
7431 ctxt->nbChars += count;
7432 ctxt->input->col += count;
7433 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007434 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 }
7436 return(ret);
7437 }
7438 }
7439 return(xmlParseNCNameComplex(ctxt));
7440}
7441
7442/**
7443 * xmlParseQName:
7444 * @ctxt: an XML parser context
7445 * @prefix: pointer to store the prefix part
7446 *
7447 * parse an XML Namespace QName
7448 *
7449 * [6] QName ::= (Prefix ':')? LocalPart
7450 * [7] Prefix ::= NCName
7451 * [8] LocalPart ::= NCName
7452 *
7453 * Returns the Name parsed or NULL
7454 */
7455
7456static const xmlChar *
7457xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7458 const xmlChar *l, *p;
7459
7460 GROW;
7461
7462 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007463 if (l == NULL) {
7464 if (CUR == ':') {
7465 l = xmlParseName(ctxt);
7466 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007467 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7468 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007469 *prefix = NULL;
7470 return(l);
7471 }
7472 }
7473 return(NULL);
7474 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007475 if (CUR == ':') {
7476 NEXT;
7477 p = l;
7478 l = xmlParseNCName(ctxt);
7479 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007480 xmlChar *tmp;
7481
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007482 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7483 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007484 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7485 p = xmlDictLookup(ctxt->dict, tmp, -1);
7486 if (tmp != NULL) xmlFree(tmp);
7487 *prefix = NULL;
7488 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007489 }
7490 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007491 xmlChar *tmp;
7492
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007493 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7494 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007495 NEXT;
7496 tmp = (xmlChar *) xmlParseName(ctxt);
7497 if (tmp != NULL) {
7498 tmp = xmlBuildQName(tmp, l, NULL, 0);
7499 l = xmlDictLookup(ctxt->dict, tmp, -1);
7500 if (tmp != NULL) xmlFree(tmp);
7501 *prefix = p;
7502 return(l);
7503 }
7504 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7505 l = xmlDictLookup(ctxt->dict, tmp, -1);
7506 if (tmp != NULL) xmlFree(tmp);
7507 *prefix = p;
7508 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007509 }
7510 *prefix = p;
7511 } else
7512 *prefix = NULL;
7513 return(l);
7514}
7515
7516/**
7517 * xmlParseQNameAndCompare:
7518 * @ctxt: an XML parser context
7519 * @name: the localname
7520 * @prefix: the prefix, if any.
7521 *
7522 * parse an XML name and compares for match
7523 * (specialized for endtag parsing)
7524 *
7525 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7526 * and the name for mismatch
7527 */
7528
7529static const xmlChar *
7530xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7531 xmlChar const *prefix) {
7532 const xmlChar *cmp = name;
7533 const xmlChar *in;
7534 const xmlChar *ret;
7535 const xmlChar *prefix2;
7536
7537 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7538
7539 GROW;
7540 in = ctxt->input->cur;
7541
7542 cmp = prefix;
7543 while (*in != 0 && *in == *cmp) {
7544 ++in;
7545 ++cmp;
7546 }
7547 if ((*cmp == 0) && (*in == ':')) {
7548 in++;
7549 cmp = name;
7550 while (*in != 0 && *in == *cmp) {
7551 ++in;
7552 ++cmp;
7553 }
William M. Brack76e95df2003-10-18 16:20:14 +00007554 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007555 /* success */
7556 ctxt->input->cur = in;
7557 return((const xmlChar*) 1);
7558 }
7559 }
7560 /*
7561 * all strings coms from the dictionary, equality can be done directly
7562 */
7563 ret = xmlParseQName (ctxt, &prefix2);
7564 if ((ret == name) && (prefix == prefix2))
7565 return((const xmlChar*) 1);
7566 return ret;
7567}
7568
7569/**
7570 * xmlParseAttValueInternal:
7571 * @ctxt: an XML parser context
7572 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007573 * @alloc: whether the attribute was reallocated as a new string
7574 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007575 *
7576 * parse a value for an attribute.
7577 * NOTE: if no normalization is needed, the routine will return pointers
7578 * directly from the data buffer.
7579 *
7580 * 3.3.3 Attribute-Value Normalization:
7581 * Before the value of an attribute is passed to the application or
7582 * checked for validity, the XML processor must normalize it as follows:
7583 * - a character reference is processed by appending the referenced
7584 * character to the attribute value
7585 * - an entity reference is processed by recursively processing the
7586 * replacement text of the entity
7587 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7588 * appending #x20 to the normalized value, except that only a single
7589 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7590 * parsed entity or the literal entity value of an internal parsed entity
7591 * - other characters are processed by appending them to the normalized value
7592 * If the declared value is not CDATA, then the XML processor must further
7593 * process the normalized attribute value by discarding any leading and
7594 * trailing space (#x20) characters, and by replacing sequences of space
7595 * (#x20) characters by a single space (#x20) character.
7596 * All attributes for which no declaration has been read should be treated
7597 * by a non-validating parser as if declared CDATA.
7598 *
7599 * Returns the AttValue parsed or NULL. The value has to be freed by the
7600 * caller if it was copied, this can be detected by val[*len] == 0.
7601 */
7602
7603static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007604xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7605 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007606{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007607 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007608 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 xmlChar *ret = NULL;
7610
7611 GROW;
7612 in = (xmlChar *) CUR_PTR;
7613 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007614 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007615 return (NULL);
7616 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007617 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007618
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007619 /*
7620 * try to handle in this routine the most common case where no
7621 * allocation of a new string is required and where content is
7622 * pure ASCII.
7623 */
7624 limit = *in++;
7625 end = ctxt->input->end;
7626 start = in;
7627 if (in >= end) {
7628 const xmlChar *oldbase = ctxt->input->base;
7629 GROW;
7630 if (oldbase != ctxt->input->base) {
7631 long delta = ctxt->input->base - oldbase;
7632 start = start + delta;
7633 in = in + delta;
7634 }
7635 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007636 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007637 if (normalize) {
7638 /*
7639 * Skip any leading spaces
7640 */
7641 while ((in < end) && (*in != limit) &&
7642 ((*in == 0x20) || (*in == 0x9) ||
7643 (*in == 0xA) || (*in == 0xD))) {
7644 in++;
7645 start = in;
7646 if (in >= end) {
7647 const xmlChar *oldbase = ctxt->input->base;
7648 GROW;
7649 if (oldbase != ctxt->input->base) {
7650 long delta = ctxt->input->base - oldbase;
7651 start = start + delta;
7652 in = in + delta;
7653 }
7654 end = ctxt->input->end;
7655 }
7656 }
7657 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7658 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7659 if ((*in++ == 0x20) && (*in == 0x20)) break;
7660 if (in >= end) {
7661 const xmlChar *oldbase = ctxt->input->base;
7662 GROW;
7663 if (oldbase != ctxt->input->base) {
7664 long delta = ctxt->input->base - oldbase;
7665 start = start + delta;
7666 in = in + delta;
7667 }
7668 end = ctxt->input->end;
7669 }
7670 }
7671 last = in;
7672 /*
7673 * skip the trailing blanks
7674 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007675 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007676 while ((in < end) && (*in != limit) &&
7677 ((*in == 0x20) || (*in == 0x9) ||
7678 (*in == 0xA) || (*in == 0xD))) {
7679 in++;
7680 if (in >= end) {
7681 const xmlChar *oldbase = ctxt->input->base;
7682 GROW;
7683 if (oldbase != ctxt->input->base) {
7684 long delta = ctxt->input->base - oldbase;
7685 start = start + delta;
7686 in = in + delta;
7687 last = last + delta;
7688 }
7689 end = ctxt->input->end;
7690 }
7691 }
7692 if (*in != limit) goto need_complex;
7693 } else {
7694 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7695 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7696 in++;
7697 if (in >= end) {
7698 const xmlChar *oldbase = ctxt->input->base;
7699 GROW;
7700 if (oldbase != ctxt->input->base) {
7701 long delta = ctxt->input->base - oldbase;
7702 start = start + delta;
7703 in = in + delta;
7704 }
7705 end = ctxt->input->end;
7706 }
7707 }
7708 last = in;
7709 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007710 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007711 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007713 *len = last - start;
7714 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007716 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007717 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007718 }
7719 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007720 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007722need_complex:
7723 if (alloc) *alloc = 1;
7724 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725}
7726
7727/**
7728 * xmlParseAttribute2:
7729 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007730 * @pref: the element prefix
7731 * @elem: the element name
7732 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007734 * @len: an int * to save the length of the attribute
7735 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 *
7737 * parse an attribute in the new SAX2 framework.
7738 *
7739 * Returns the attribute name, and the value in *value, .
7740 */
7741
7742static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007743xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7744 const xmlChar *pref, const xmlChar *elem,
7745 const xmlChar **prefix, xmlChar **value,
7746 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007747 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007748 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007749 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750
7751 *value = NULL;
7752 GROW;
7753 name = xmlParseQName(ctxt, prefix);
7754 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007755 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7756 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757 return(NULL);
7758 }
7759
7760 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007761 * get the type if needed
7762 */
7763 if (ctxt->attsSpecial != NULL) {
7764 int type;
7765
7766 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7767 pref, elem, *prefix, name);
7768 if (type != 0) normalize = 1;
7769 }
7770
7771 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007772 * read the value
7773 */
7774 SKIP_BLANKS;
7775 if (RAW == '=') {
7776 NEXT;
7777 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007778 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007779 ctxt->instate = XML_PARSER_CONTENT;
7780 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007781 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007783 return(NULL);
7784 }
7785
Daniel Veillardd8925572005-06-08 22:34:55 +00007786 if (*prefix == ctxt->str_xml) {
7787 /*
7788 * Check that xml:lang conforms to the specification
7789 * No more registered as an error, just generate a warning now
7790 * since this was deprecated in XML second edition
7791 */
7792 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7793 internal_val = xmlStrndup(val, *len);
7794 if (!xmlCheckLanguageID(internal_val)) {
7795 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7796 "Malformed value for xml:lang : %s\n",
7797 internal_val, NULL);
7798 }
7799 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007800
Daniel Veillardd8925572005-06-08 22:34:55 +00007801 /*
7802 * Check that xml:space conforms to the specification
7803 */
7804 if (xmlStrEqual(name, BAD_CAST "space")) {
7805 internal_val = xmlStrndup(val, *len);
7806 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7807 *(ctxt->space) = 0;
7808 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7809 *(ctxt->space) = 1;
7810 else {
7811 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007812"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007813 internal_val, NULL);
7814 }
7815 }
7816 if (internal_val) {
7817 xmlFree(internal_val);
7818 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007819 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007820
7821 *value = val;
7822 return(name);
7823}
7824
7825/**
7826 * xmlParseStartTag2:
7827 * @ctxt: an XML parser context
7828 *
7829 * parse a start of tag either for rule element or
7830 * EmptyElement. In both case we don't parse the tag closing chars.
7831 * This routine is called when running SAX2 parsing
7832 *
7833 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7834 *
7835 * [ WFC: Unique Att Spec ]
7836 * No attribute name may appear more than once in the same start-tag or
7837 * empty-element tag.
7838 *
7839 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7840 *
7841 * [ WFC: Unique Att Spec ]
7842 * No attribute name may appear more than once in the same start-tag or
7843 * empty-element tag.
7844 *
7845 * With namespace:
7846 *
7847 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7848 *
7849 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7850 *
7851 * Returns the element name parsed
7852 */
7853
7854static const xmlChar *
7855xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007856 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007857 const xmlChar *localname;
7858 const xmlChar *prefix;
7859 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007860 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007861 const xmlChar *nsname;
7862 xmlChar *attvalue;
7863 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007864 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007865 int nratts, nbatts, nbdef;
7866 int i, j, nbNs, attval;
7867 const xmlChar *base;
7868 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007869 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007870
7871 if (RAW != '<') return(NULL);
7872 NEXT1;
7873
7874 /*
7875 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7876 * point since the attribute values may be stored as pointers to
7877 * the buffer and calling SHRINK would destroy them !
7878 * The Shrinking is only possible once the full set of attribute
7879 * callbacks have been done.
7880 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007881reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007882 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883 base = ctxt->input->base;
7884 cur = ctxt->input->cur - ctxt->input->base;
7885 nbatts = 0;
7886 nratts = 0;
7887 nbdef = 0;
7888 nbNs = 0;
7889 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007890 /* Forget any namespaces added during an earlier parse of this element. */
7891 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007892
7893 localname = xmlParseQName(ctxt, &prefix);
7894 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007895 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7896 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007897 return(NULL);
7898 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007899 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007900
7901 /*
7902 * Now parse the attributes, it ends up with the ending
7903 *
7904 * (S Attribute)* S?
7905 */
7906 SKIP_BLANKS;
7907 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007908 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007909
7910 while ((RAW != '>') &&
7911 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007912 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007913 const xmlChar *q = CUR_PTR;
7914 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007915 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007916
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007917 attname = xmlParseAttribute2(ctxt, prefix, localname,
7918 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007919 if ((attname != NULL) && (attvalue != NULL)) {
7920 if (len < 0) len = xmlStrlen(attvalue);
7921 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007922 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7923 xmlURIPtr uri;
7924
7925 if (*URL != 0) {
7926 uri = xmlParseURI((const char *) URL);
7927 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007928 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7929 "xmlns: %s not a valid URI\n",
7930 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007931 } else {
7932 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007933 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7934 "xmlns: URI %s is not absolute\n",
7935 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007936 }
7937 xmlFreeURI(uri);
7938 }
7939 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007941 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007943 for (j = 1;j <= nbNs;j++)
7944 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7945 break;
7946 if (j <= nbNs)
7947 xmlErrAttributeDup(ctxt, NULL, attname);
7948 else
7949 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007950 if (alloc != 0) xmlFree(attvalue);
7951 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007952 continue;
7953 }
7954 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007955 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7956 xmlURIPtr uri;
7957
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007958 if (attname == ctxt->str_xml) {
7959 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007960 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7961 "xml namespace prefix mapped to wrong URI\n",
7962 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007963 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007964 /*
7965 * Do not keep a namespace definition node
7966 */
7967 if (alloc != 0) xmlFree(attvalue);
7968 SKIP_BLANKS;
7969 continue;
7970 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007971 uri = xmlParseURI((const char *) URL);
7972 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007973 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7974 "xmlns:%s: '%s' is not a valid URI\n",
7975 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007976 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007977 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007978 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7979 "xmlns:%s: URI %s is not absolute\n",
7980 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007981 }
7982 xmlFreeURI(uri);
7983 }
7984
Daniel Veillard0fb18932003-09-07 09:14:37 +00007985 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007986 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007988 for (j = 1;j <= nbNs;j++)
7989 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7990 break;
7991 if (j <= nbNs)
7992 xmlErrAttributeDup(ctxt, aprefix, attname);
7993 else
7994 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007995 if (alloc != 0) xmlFree(attvalue);
7996 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007997 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007998 continue;
7999 }
8000
8001 /*
8002 * Add the pair to atts
8003 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008004 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8005 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008006 if (attvalue[len] == 0)
8007 xmlFree(attvalue);
8008 goto failed;
8009 }
8010 maxatts = ctxt->maxatts;
8011 atts = ctxt->atts;
8012 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008013 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008014 atts[nbatts++] = attname;
8015 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008016 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017 atts[nbatts++] = attvalue;
8018 attvalue += len;
8019 atts[nbatts++] = attvalue;
8020 /*
8021 * tag if some deallocation is needed
8022 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008023 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008024 } else {
8025 if ((attvalue != NULL) && (attvalue[len] == 0))
8026 xmlFree(attvalue);
8027 }
8028
8029failed:
8030
8031 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008032 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008033 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8034 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008035 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008036 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8037 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008038 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008039 }
8040 SKIP_BLANKS;
8041 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8042 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008043 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008044 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008045 break;
8046 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008047 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008048 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 }
8050
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008052 * The attributes defaulting
8053 */
8054 if (ctxt->attsDefault != NULL) {
8055 xmlDefAttrsPtr defaults;
8056
8057 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8058 if (defaults != NULL) {
8059 for (i = 0;i < defaults->nbAttrs;i++) {
8060 attname = defaults->values[4 * i];
8061 aprefix = defaults->values[4 * i + 1];
8062
8063 /*
8064 * special work for namespaces defaulted defs
8065 */
8066 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8067 /*
8068 * check that it's not a defined namespace
8069 */
8070 for (j = 1;j <= nbNs;j++)
8071 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8072 break;
8073 if (j <= nbNs) continue;
8074
8075 nsname = xmlGetNamespace(ctxt, NULL);
8076 if (nsname != defaults->values[4 * i + 2]) {
8077 if (nsPush(ctxt, NULL,
8078 defaults->values[4 * i + 2]) > 0)
8079 nbNs++;
8080 }
8081 } else if (aprefix == ctxt->str_xmlns) {
8082 /*
8083 * check that it's not a defined namespace
8084 */
8085 for (j = 1;j <= nbNs;j++)
8086 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8087 break;
8088 if (j <= nbNs) continue;
8089
8090 nsname = xmlGetNamespace(ctxt, attname);
8091 if (nsname != defaults->values[2]) {
8092 if (nsPush(ctxt, attname,
8093 defaults->values[4 * i + 2]) > 0)
8094 nbNs++;
8095 }
8096 } else {
8097 /*
8098 * check that it's not a defined attribute
8099 */
8100 for (j = 0;j < nbatts;j+=5) {
8101 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8102 break;
8103 }
8104 if (j < nbatts) continue;
8105
8106 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8107 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008108 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008109 }
8110 maxatts = ctxt->maxatts;
8111 atts = ctxt->atts;
8112 }
8113 atts[nbatts++] = attname;
8114 atts[nbatts++] = aprefix;
8115 if (aprefix == NULL)
8116 atts[nbatts++] = NULL;
8117 else
8118 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8119 atts[nbatts++] = defaults->values[4 * i + 2];
8120 atts[nbatts++] = defaults->values[4 * i + 3];
8121 nbdef++;
8122 }
8123 }
8124 }
8125 }
8126
Daniel Veillarde70c8772003-11-25 07:21:18 +00008127 /*
8128 * The attributes checkings
8129 */
8130 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008131 /*
8132 * The default namespace does not apply to attribute names.
8133 */
8134 if (atts[i + 1] != NULL) {
8135 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8136 if (nsname == NULL) {
8137 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8138 "Namespace prefix %s for %s on %s is not defined\n",
8139 atts[i + 1], atts[i], localname);
8140 }
8141 atts[i + 2] = nsname;
8142 } else
8143 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008144 /*
8145 * [ WFC: Unique Att Spec ]
8146 * No attribute name may appear more than once in the same
8147 * start-tag or empty-element tag.
8148 * As extended by the Namespace in XML REC.
8149 */
8150 for (j = 0; j < i;j += 5) {
8151 if (atts[i] == atts[j]) {
8152 if (atts[i+1] == atts[j+1]) {
8153 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8154 break;
8155 }
8156 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8157 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8158 "Namespaced Attribute %s in '%s' redefined\n",
8159 atts[i], nsname, NULL);
8160 break;
8161 }
8162 }
8163 }
8164 }
8165
Daniel Veillarde57ec792003-09-10 10:50:59 +00008166 nsname = xmlGetNamespace(ctxt, prefix);
8167 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008168 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8169 "Namespace prefix %s on %s is not defined\n",
8170 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008171 }
8172 *pref = prefix;
8173 *URI = nsname;
8174
8175 /*
8176 * SAX: Start of Element !
8177 */
8178 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8179 (!ctxt->disableSAX)) {
8180 if (nbNs > 0)
8181 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8182 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8183 nbatts / 5, nbdef, atts);
8184 else
8185 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8186 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8187 }
8188
8189 /*
8190 * Free up attribute allocated strings if needed
8191 */
8192 if (attval != 0) {
8193 for (i = 3,j = 0; j < nratts;i += 5,j++)
8194 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8195 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196 }
8197
8198 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008199
8200base_changed:
8201 /*
8202 * the attribute strings are valid iif the base didn't changed
8203 */
8204 if (attval != 0) {
8205 for (i = 3,j = 0; j < nratts;i += 5,j++)
8206 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8207 xmlFree((xmlChar *) atts[i]);
8208 }
8209 ctxt->input->cur = ctxt->input->base + cur;
8210 if (ctxt->wellFormed == 1) {
8211 goto reparse;
8212 }
8213 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008214}
8215
8216/**
8217 * xmlParseEndTag2:
8218 * @ctxt: an XML parser context
8219 * @line: line of the start tag
8220 * @nsNr: number of namespaces on the start tag
8221 *
8222 * parse an end of tag
8223 *
8224 * [42] ETag ::= '</' Name S? '>'
8225 *
8226 * With namespace
8227 *
8228 * [NS 9] ETag ::= '</' QName S? '>'
8229 */
8230
8231static void
8232xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008233 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234 const xmlChar *name;
8235
8236 GROW;
8237 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008238 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008239 return;
8240 }
8241 SKIP(2);
8242
William M. Brack13dfa872004-09-18 04:52:08 +00008243 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008244 if (ctxt->input->cur[tlen] == '>') {
8245 ctxt->input->cur += tlen + 1;
8246 goto done;
8247 }
8248 ctxt->input->cur += tlen;
8249 name = (xmlChar*)1;
8250 } else {
8251 if (prefix == NULL)
8252 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8253 else
8254 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8255 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008256
8257 /*
8258 * We should definitely be at the ending "S? '>'" part
8259 */
8260 GROW;
8261 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008262 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008263 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264 } else
8265 NEXT1;
8266
8267 /*
8268 * [ WFC: Element Type Match ]
8269 * The Name in an element's end-tag must match the element type in the
8270 * start-tag.
8271 *
8272 */
8273 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008274 if (name == NULL) name = BAD_CAST "unparseable";
8275 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008277 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 }
8279
8280 /*
8281 * SAX: End of Tag
8282 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008283done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008284 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8285 (!ctxt->disableSAX))
8286 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8287
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 spacePop(ctxt);
8289 if (nsNr != 0)
8290 nsPop(ctxt, nsNr);
8291 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008292}
8293
8294/**
Owen Taylor3473f882001-02-23 17:55:21 +00008295 * xmlParseCDSect:
8296 * @ctxt: an XML parser context
8297 *
8298 * Parse escaped pure raw content.
8299 *
8300 * [18] CDSect ::= CDStart CData CDEnd
8301 *
8302 * [19] CDStart ::= '<![CDATA['
8303 *
8304 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8305 *
8306 * [21] CDEnd ::= ']]>'
8307 */
8308void
8309xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8310 xmlChar *buf = NULL;
8311 int len = 0;
8312 int size = XML_PARSER_BUFFER_SIZE;
8313 int r, rl;
8314 int s, sl;
8315 int cur, l;
8316 int count = 0;
8317
Daniel Veillard8f597c32003-10-06 08:19:27 +00008318 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008319 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008320 SKIP(9);
8321 } else
8322 return;
8323
8324 ctxt->instate = XML_PARSER_CDATA_SECTION;
8325 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008326 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008327 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008328 ctxt->instate = XML_PARSER_CONTENT;
8329 return;
8330 }
8331 NEXTL(rl);
8332 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008333 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008334 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008335 ctxt->instate = XML_PARSER_CONTENT;
8336 return;
8337 }
8338 NEXTL(sl);
8339 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008340 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008341 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008342 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008343 return;
8344 }
William M. Brack871611b2003-10-18 04:53:14 +00008345 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008346 ((r != ']') || (s != ']') || (cur != '>'))) {
8347 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008348 xmlChar *tmp;
8349
Owen Taylor3473f882001-02-23 17:55:21 +00008350 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008351 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8352 if (tmp == NULL) {
8353 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008354 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008355 return;
8356 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008357 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008358 }
8359 COPY_BUF(rl,buf,len,r);
8360 r = s;
8361 rl = sl;
8362 s = cur;
8363 sl = l;
8364 count++;
8365 if (count > 50) {
8366 GROW;
8367 count = 0;
8368 }
8369 NEXTL(l);
8370 cur = CUR_CHAR(l);
8371 }
8372 buf[len] = 0;
8373 ctxt->instate = XML_PARSER_CONTENT;
8374 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008375 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008376 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008377 xmlFree(buf);
8378 return;
8379 }
8380 NEXTL(l);
8381
8382 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008383 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008384 */
8385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8386 if (ctxt->sax->cdataBlock != NULL)
8387 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008388 else if (ctxt->sax->characters != NULL)
8389 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008390 }
8391 xmlFree(buf);
8392}
8393
8394/**
8395 * xmlParseContent:
8396 * @ctxt: an XML parser context
8397 *
8398 * Parse a content:
8399 *
8400 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8401 */
8402
8403void
8404xmlParseContent(xmlParserCtxtPtr ctxt) {
8405 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008406 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008407 ((RAW != '<') || (NXT(1) != '/')) &&
8408 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008409 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008410 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008411 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008412
8413 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008414 * First case : a Processing Instruction.
8415 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008416 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008417 xmlParsePI(ctxt);
8418 }
8419
8420 /*
8421 * Second case : a CDSection
8422 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008423 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008424 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008425 xmlParseCDSect(ctxt);
8426 }
8427
8428 /*
8429 * Third case : a comment
8430 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008431 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008432 (NXT(2) == '-') && (NXT(3) == '-')) {
8433 xmlParseComment(ctxt);
8434 ctxt->instate = XML_PARSER_CONTENT;
8435 }
8436
8437 /*
8438 * Fourth case : a sub-element.
8439 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008440 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008441 xmlParseElement(ctxt);
8442 }
8443
8444 /*
8445 * Fifth case : a reference. If if has not been resolved,
8446 * parsing returns it's Name, create the node
8447 */
8448
Daniel Veillard21a0f912001-02-25 19:54:14 +00008449 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008450 xmlParseReference(ctxt);
8451 }
8452
8453 /*
8454 * Last case, text. Note that References are handled directly.
8455 */
8456 else {
8457 xmlParseCharData(ctxt, 0);
8458 }
8459
8460 GROW;
8461 /*
8462 * Pop-up of finished entities.
8463 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008464 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008465 xmlPopInput(ctxt);
8466 SHRINK;
8467
Daniel Veillardfdc91562002-07-01 21:52:03 +00008468 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008469 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8470 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008471 ctxt->instate = XML_PARSER_EOF;
8472 break;
8473 }
8474 }
8475}
8476
8477/**
8478 * xmlParseElement:
8479 * @ctxt: an XML parser context
8480 *
8481 * parse an XML element, this is highly recursive
8482 *
8483 * [39] element ::= EmptyElemTag | STag content ETag
8484 *
8485 * [ WFC: Element Type Match ]
8486 * The Name in an element's end-tag must match the element type in the
8487 * start-tag.
8488 *
Owen Taylor3473f882001-02-23 17:55:21 +00008489 */
8490
8491void
8492xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008493 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494 const xmlChar *prefix;
8495 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008496 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008497 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008498 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008499 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008500
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008501 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8502 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8503 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8504 xmlParserMaxDepth);
8505 ctxt->instate = XML_PARSER_EOF;
8506 return;
8507 }
8508
Owen Taylor3473f882001-02-23 17:55:21 +00008509 /* Capture start position */
8510 if (ctxt->record_info) {
8511 node_info.begin_pos = ctxt->input->consumed +
8512 (CUR_PTR - ctxt->input->base);
8513 node_info.begin_line = ctxt->input->line;
8514 }
8515
8516 if (ctxt->spaceNr == 0)
8517 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008518 else if (*ctxt->space == -2)
8519 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008520 else
8521 spacePush(ctxt, *ctxt->space);
8522
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008523 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008524#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008525 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008526#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008527 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008528#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529 else
8530 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008531#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008532 if (name == NULL) {
8533 spacePop(ctxt);
8534 return;
8535 }
8536 namePush(ctxt, name);
8537 ret = ctxt->node;
8538
Daniel Veillard4432df22003-09-28 18:58:27 +00008539#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008540 /*
8541 * [ VC: Root Element Type ]
8542 * The Name in the document type declaration must match the element
8543 * type of the root element.
8544 */
8545 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8546 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8547 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008548#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008549
8550 /*
8551 * Check for an Empty Element.
8552 */
8553 if ((RAW == '/') && (NXT(1) == '>')) {
8554 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008555 if (ctxt->sax2) {
8556 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8557 (!ctxt->disableSAX))
8558 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008559#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008560 } else {
8561 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8562 (!ctxt->disableSAX))
8563 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008564#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008565 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 namePop(ctxt);
8567 spacePop(ctxt);
8568 if (nsNr != ctxt->nsNr)
8569 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008570 if ( ret != NULL && ctxt->record_info ) {
8571 node_info.end_pos = ctxt->input->consumed +
8572 (CUR_PTR - ctxt->input->base);
8573 node_info.end_line = ctxt->input->line;
8574 node_info.node = ret;
8575 xmlParserAddNodeInfo(ctxt, &node_info);
8576 }
8577 return;
8578 }
8579 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008580 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008581 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008582 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8583 "Couldn't find end of Start Tag %s line %d\n",
8584 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008585
8586 /*
8587 * end of parsing of this node.
8588 */
8589 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008590 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008591 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008592 if (nsNr != ctxt->nsNr)
8593 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008594
8595 /*
8596 * Capture end position and add node
8597 */
8598 if ( ret != NULL && ctxt->record_info ) {
8599 node_info.end_pos = ctxt->input->consumed +
8600 (CUR_PTR - ctxt->input->base);
8601 node_info.end_line = ctxt->input->line;
8602 node_info.node = ret;
8603 xmlParserAddNodeInfo(ctxt, &node_info);
8604 }
8605 return;
8606 }
8607
8608 /*
8609 * Parse the content of the element:
8610 */
8611 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008612 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008614 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008615 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008616
8617 /*
8618 * end of parsing of this node.
8619 */
8620 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008621 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008622 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008623 if (nsNr != ctxt->nsNr)
8624 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 return;
8626 }
8627
8628 /*
8629 * parse the end of tag: '</' should be here.
8630 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008631 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008632 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008633 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008634 }
8635#ifdef LIBXML_SAX1_ENABLED
8636 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008638#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008639
8640 /*
8641 * Capture end position and add node
8642 */
8643 if ( ret != NULL && ctxt->record_info ) {
8644 node_info.end_pos = ctxt->input->consumed +
8645 (CUR_PTR - ctxt->input->base);
8646 node_info.end_line = ctxt->input->line;
8647 node_info.node = ret;
8648 xmlParserAddNodeInfo(ctxt, &node_info);
8649 }
8650}
8651
8652/**
8653 * xmlParseVersionNum:
8654 * @ctxt: an XML parser context
8655 *
8656 * parse the XML version value.
8657 *
8658 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8659 *
8660 * Returns the string giving the XML version number, or NULL
8661 */
8662xmlChar *
8663xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8664 xmlChar *buf = NULL;
8665 int len = 0;
8666 int size = 10;
8667 xmlChar cur;
8668
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008669 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008670 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008671 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008672 return(NULL);
8673 }
8674 cur = CUR;
8675 while (((cur >= 'a') && (cur <= 'z')) ||
8676 ((cur >= 'A') && (cur <= 'Z')) ||
8677 ((cur >= '0') && (cur <= '9')) ||
8678 (cur == '_') || (cur == '.') ||
8679 (cur == ':') || (cur == '-')) {
8680 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008681 xmlChar *tmp;
8682
Owen Taylor3473f882001-02-23 17:55:21 +00008683 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008684 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8685 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008686 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008687 return(NULL);
8688 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008689 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008690 }
8691 buf[len++] = cur;
8692 NEXT;
8693 cur=CUR;
8694 }
8695 buf[len] = 0;
8696 return(buf);
8697}
8698
8699/**
8700 * xmlParseVersionInfo:
8701 * @ctxt: an XML parser context
8702 *
8703 * parse the XML version.
8704 *
8705 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8706 *
8707 * [25] Eq ::= S? '=' S?
8708 *
8709 * Returns the version string, e.g. "1.0"
8710 */
8711
8712xmlChar *
8713xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8714 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008715
Daniel Veillarda07050d2003-10-19 14:46:32 +00008716 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008717 SKIP(7);
8718 SKIP_BLANKS;
8719 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008720 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008721 return(NULL);
8722 }
8723 NEXT;
8724 SKIP_BLANKS;
8725 if (RAW == '"') {
8726 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008727 version = xmlParseVersionNum(ctxt);
8728 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008729 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008730 } else
8731 NEXT;
8732 } else if (RAW == '\''){
8733 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008734 version = xmlParseVersionNum(ctxt);
8735 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008736 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008737 } else
8738 NEXT;
8739 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008740 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008741 }
8742 }
8743 return(version);
8744}
8745
8746/**
8747 * xmlParseEncName:
8748 * @ctxt: an XML parser context
8749 *
8750 * parse the XML encoding name
8751 *
8752 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8753 *
8754 * Returns the encoding name value or NULL
8755 */
8756xmlChar *
8757xmlParseEncName(xmlParserCtxtPtr ctxt) {
8758 xmlChar *buf = NULL;
8759 int len = 0;
8760 int size = 10;
8761 xmlChar cur;
8762
8763 cur = CUR;
8764 if (((cur >= 'a') && (cur <= 'z')) ||
8765 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008766 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008767 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008768 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008769 return(NULL);
8770 }
8771
8772 buf[len++] = cur;
8773 NEXT;
8774 cur = CUR;
8775 while (((cur >= 'a') && (cur <= 'z')) ||
8776 ((cur >= 'A') && (cur <= 'Z')) ||
8777 ((cur >= '0') && (cur <= '9')) ||
8778 (cur == '.') || (cur == '_') ||
8779 (cur == '-')) {
8780 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008781 xmlChar *tmp;
8782
Owen Taylor3473f882001-02-23 17:55:21 +00008783 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008784 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8785 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008786 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008787 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008788 return(NULL);
8789 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008790 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008791 }
8792 buf[len++] = cur;
8793 NEXT;
8794 cur = CUR;
8795 if (cur == 0) {
8796 SHRINK;
8797 GROW;
8798 cur = CUR;
8799 }
8800 }
8801 buf[len] = 0;
8802 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008803 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008804 }
8805 return(buf);
8806}
8807
8808/**
8809 * xmlParseEncodingDecl:
8810 * @ctxt: an XML parser context
8811 *
8812 * parse the XML encoding declaration
8813 *
8814 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8815 *
8816 * this setups the conversion filters.
8817 *
8818 * Returns the encoding value or NULL
8819 */
8820
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008821const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008822xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8823 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008824
8825 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008826 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008827 SKIP(8);
8828 SKIP_BLANKS;
8829 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008830 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008831 return(NULL);
8832 }
8833 NEXT;
8834 SKIP_BLANKS;
8835 if (RAW == '"') {
8836 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008837 encoding = xmlParseEncName(ctxt);
8838 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008839 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008840 } else
8841 NEXT;
8842 } else if (RAW == '\''){
8843 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008844 encoding = xmlParseEncName(ctxt);
8845 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008846 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008847 } else
8848 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008849 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008850 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008851 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008852 /*
8853 * UTF-16 encoding stwich has already taken place at this stage,
8854 * more over the little-endian/big-endian selection is already done
8855 */
8856 if ((encoding != NULL) &&
8857 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8858 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008859 if (ctxt->encoding != NULL)
8860 xmlFree((xmlChar *) ctxt->encoding);
8861 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008862 }
8863 /*
8864 * UTF-8 encoding is handled natively
8865 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008866 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008867 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8868 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008869 if (ctxt->encoding != NULL)
8870 xmlFree((xmlChar *) ctxt->encoding);
8871 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008872 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008873 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008874 xmlCharEncodingHandlerPtr handler;
8875
8876 if (ctxt->input->encoding != NULL)
8877 xmlFree((xmlChar *) ctxt->input->encoding);
8878 ctxt->input->encoding = encoding;
8879
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008880 handler = xmlFindCharEncodingHandler((const char *) encoding);
8881 if (handler != NULL) {
8882 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008883 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008884 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008885 "Unsupported encoding %s\n", encoding);
8886 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008887 }
8888 }
8889 }
8890 return(encoding);
8891}
8892
8893/**
8894 * xmlParseSDDecl:
8895 * @ctxt: an XML parser context
8896 *
8897 * parse the XML standalone declaration
8898 *
8899 * [32] SDDecl ::= S 'standalone' Eq
8900 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8901 *
8902 * [ VC: Standalone Document Declaration ]
8903 * TODO The standalone document declaration must have the value "no"
8904 * if any external markup declarations contain declarations of:
8905 * - attributes with default values, if elements to which these
8906 * attributes apply appear in the document without specifications
8907 * of values for these attributes, or
8908 * - entities (other than amp, lt, gt, apos, quot), if references
8909 * to those entities appear in the document, or
8910 * - attributes with values subject to normalization, where the
8911 * attribute appears in the document with a value which will change
8912 * as a result of normalization, or
8913 * - element types with element content, if white space occurs directly
8914 * within any instance of those types.
8915 *
8916 * Returns 1 if standalone, 0 otherwise
8917 */
8918
8919int
8920xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8921 int standalone = -1;
8922
8923 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008924 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008925 SKIP(10);
8926 SKIP_BLANKS;
8927 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008928 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008929 return(standalone);
8930 }
8931 NEXT;
8932 SKIP_BLANKS;
8933 if (RAW == '\''){
8934 NEXT;
8935 if ((RAW == 'n') && (NXT(1) == 'o')) {
8936 standalone = 0;
8937 SKIP(2);
8938 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8939 (NXT(2) == 's')) {
8940 standalone = 1;
8941 SKIP(3);
8942 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008943 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008944 }
8945 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008946 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008947 } else
8948 NEXT;
8949 } else if (RAW == '"'){
8950 NEXT;
8951 if ((RAW == 'n') && (NXT(1) == 'o')) {
8952 standalone = 0;
8953 SKIP(2);
8954 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8955 (NXT(2) == 's')) {
8956 standalone = 1;
8957 SKIP(3);
8958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008959 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008960 }
8961 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008962 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008963 } else
8964 NEXT;
8965 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 }
8968 }
8969 return(standalone);
8970}
8971
8972/**
8973 * xmlParseXMLDecl:
8974 * @ctxt: an XML parser context
8975 *
8976 * parse an XML declaration header
8977 *
8978 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8979 */
8980
8981void
8982xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8983 xmlChar *version;
8984
8985 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008986 * This value for standalone indicates that the document has an
8987 * XML declaration but it does not have a standalone attribute.
8988 * It will be overwritten later if a standalone attribute is found.
8989 */
8990 ctxt->input->standalone = -2;
8991
8992 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008993 * We know that '<?xml' is here.
8994 */
8995 SKIP(5);
8996
William M. Brack76e95df2003-10-18 16:20:14 +00008997 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8999 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009000 }
9001 SKIP_BLANKS;
9002
9003 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009004 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009005 */
9006 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009007 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009008 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009009 } else {
9010 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9011 /*
9012 * TODO: Blueberry should be detected here
9013 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009014 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9015 "Unsupported version '%s'\n",
9016 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009017 }
9018 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009019 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009020 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009021 }
Owen Taylor3473f882001-02-23 17:55:21 +00009022
9023 /*
9024 * We may have the encoding declaration
9025 */
William M. Brack76e95df2003-10-18 16:20:14 +00009026 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009027 if ((RAW == '?') && (NXT(1) == '>')) {
9028 SKIP(2);
9029 return;
9030 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009031 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009032 }
9033 xmlParseEncodingDecl(ctxt);
9034 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9035 /*
9036 * The XML REC instructs us to stop parsing right here
9037 */
9038 return;
9039 }
9040
9041 /*
9042 * We may have the standalone status.
9043 */
William M. Brack76e95df2003-10-18 16:20:14 +00009044 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009045 if ((RAW == '?') && (NXT(1) == '>')) {
9046 SKIP(2);
9047 return;
9048 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009049 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009050 }
9051 SKIP_BLANKS;
9052 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9053
9054 SKIP_BLANKS;
9055 if ((RAW == '?') && (NXT(1) == '>')) {
9056 SKIP(2);
9057 } else if (RAW == '>') {
9058 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009059 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009060 NEXT;
9061 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009062 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009063 MOVETO_ENDTAG(CUR_PTR);
9064 NEXT;
9065 }
9066}
9067
9068/**
9069 * xmlParseMisc:
9070 * @ctxt: an XML parser context
9071 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009072 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009073 *
9074 * [27] Misc ::= Comment | PI | S
9075 */
9076
9077void
9078xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009079 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009080 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009081 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009082 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009083 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009084 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009085 NEXT;
9086 } else
9087 xmlParseComment(ctxt);
9088 }
9089}
9090
9091/**
9092 * xmlParseDocument:
9093 * @ctxt: an XML parser context
9094 *
9095 * parse an XML document (and build a tree if using the standard SAX
9096 * interface).
9097 *
9098 * [1] document ::= prolog element Misc*
9099 *
9100 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9101 *
9102 * Returns 0, -1 in case of error. the parser context is augmented
9103 * as a result of the parsing.
9104 */
9105
9106int
9107xmlParseDocument(xmlParserCtxtPtr ctxt) {
9108 xmlChar start[4];
9109 xmlCharEncoding enc;
9110
9111 xmlInitParser();
9112
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009113 if ((ctxt == NULL) || (ctxt->input == NULL))
9114 return(-1);
9115
Owen Taylor3473f882001-02-23 17:55:21 +00009116 GROW;
9117
9118 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009119 * SAX: detecting the level.
9120 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009121 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122
9123 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009124 * SAX: beginning of the document processing.
9125 */
9126 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9127 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9128
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009129 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9130 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009131 /*
9132 * Get the 4 first bytes and decode the charset
9133 * if enc != XML_CHAR_ENCODING_NONE
9134 * plug some encoding conversion routines.
9135 */
9136 start[0] = RAW;
9137 start[1] = NXT(1);
9138 start[2] = NXT(2);
9139 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009140 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009141 if (enc != XML_CHAR_ENCODING_NONE) {
9142 xmlSwitchEncoding(ctxt, enc);
9143 }
Owen Taylor3473f882001-02-23 17:55:21 +00009144 }
9145
9146
9147 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009148 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009149 }
9150
9151 /*
9152 * Check for the XMLDecl in the Prolog.
9153 */
9154 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009155 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009156
9157 /*
9158 * Note that we will switch encoding on the fly.
9159 */
9160 xmlParseXMLDecl(ctxt);
9161 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9162 /*
9163 * The XML REC instructs us to stop parsing right here
9164 */
9165 return(-1);
9166 }
9167 ctxt->standalone = ctxt->input->standalone;
9168 SKIP_BLANKS;
9169 } else {
9170 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9171 }
9172 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9173 ctxt->sax->startDocument(ctxt->userData);
9174
9175 /*
9176 * The Misc part of the Prolog
9177 */
9178 GROW;
9179 xmlParseMisc(ctxt);
9180
9181 /*
9182 * Then possibly doc type declaration(s) and more Misc
9183 * (doctypedecl Misc*)?
9184 */
9185 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009186 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009187
9188 ctxt->inSubset = 1;
9189 xmlParseDocTypeDecl(ctxt);
9190 if (RAW == '[') {
9191 ctxt->instate = XML_PARSER_DTD;
9192 xmlParseInternalSubset(ctxt);
9193 }
9194
9195 /*
9196 * Create and update the external subset.
9197 */
9198 ctxt->inSubset = 2;
9199 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9200 (!ctxt->disableSAX))
9201 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9202 ctxt->extSubSystem, ctxt->extSubURI);
9203 ctxt->inSubset = 0;
9204
9205
9206 ctxt->instate = XML_PARSER_PROLOG;
9207 xmlParseMisc(ctxt);
9208 }
9209
9210 /*
9211 * Time to start parsing the tree itself
9212 */
9213 GROW;
9214 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009215 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9216 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009217 } else {
9218 ctxt->instate = XML_PARSER_CONTENT;
9219 xmlParseElement(ctxt);
9220 ctxt->instate = XML_PARSER_EPILOG;
9221
9222
9223 /*
9224 * The Misc part at the end
9225 */
9226 xmlParseMisc(ctxt);
9227
Daniel Veillard561b7f82002-03-20 21:55:57 +00009228 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009230 }
9231 ctxt->instate = XML_PARSER_EOF;
9232 }
9233
9234 /*
9235 * SAX: end of the document processing.
9236 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009237 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009238 ctxt->sax->endDocument(ctxt->userData);
9239
Daniel Veillard5997aca2002-03-18 18:36:20 +00009240 /*
9241 * Remove locally kept entity definitions if the tree was not built
9242 */
9243 if ((ctxt->myDoc != NULL) &&
9244 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9245 xmlFreeDoc(ctxt->myDoc);
9246 ctxt->myDoc = NULL;
9247 }
9248
Daniel Veillardc7612992002-02-17 22:47:37 +00009249 if (! ctxt->wellFormed) {
9250 ctxt->valid = 0;
9251 return(-1);
9252 }
Owen Taylor3473f882001-02-23 17:55:21 +00009253 return(0);
9254}
9255
9256/**
9257 * xmlParseExtParsedEnt:
9258 * @ctxt: an XML parser context
9259 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009260 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009261 * An external general parsed entity is well-formed if it matches the
9262 * production labeled extParsedEnt.
9263 *
9264 * [78] extParsedEnt ::= TextDecl? content
9265 *
9266 * Returns 0, -1 in case of error. the parser context is augmented
9267 * as a result of the parsing.
9268 */
9269
9270int
9271xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9272 xmlChar start[4];
9273 xmlCharEncoding enc;
9274
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009275 if ((ctxt == NULL) || (ctxt->input == NULL))
9276 return(-1);
9277
Owen Taylor3473f882001-02-23 17:55:21 +00009278 xmlDefaultSAXHandlerInit();
9279
Daniel Veillard309f81d2003-09-23 09:02:53 +00009280 xmlDetectSAX2(ctxt);
9281
Owen Taylor3473f882001-02-23 17:55:21 +00009282 GROW;
9283
9284 /*
9285 * SAX: beginning of the document processing.
9286 */
9287 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9288 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9289
9290 /*
9291 * Get the 4 first bytes and decode the charset
9292 * if enc != XML_CHAR_ENCODING_NONE
9293 * plug some encoding conversion routines.
9294 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009295 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9296 start[0] = RAW;
9297 start[1] = NXT(1);
9298 start[2] = NXT(2);
9299 start[3] = NXT(3);
9300 enc = xmlDetectCharEncoding(start, 4);
9301 if (enc != XML_CHAR_ENCODING_NONE) {
9302 xmlSwitchEncoding(ctxt, enc);
9303 }
Owen Taylor3473f882001-02-23 17:55:21 +00009304 }
9305
9306
9307 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009308 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009309 }
9310
9311 /*
9312 * Check for the XMLDecl in the Prolog.
9313 */
9314 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009315 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009316
9317 /*
9318 * Note that we will switch encoding on the fly.
9319 */
9320 xmlParseXMLDecl(ctxt);
9321 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9322 /*
9323 * The XML REC instructs us to stop parsing right here
9324 */
9325 return(-1);
9326 }
9327 SKIP_BLANKS;
9328 } else {
9329 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9330 }
9331 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9332 ctxt->sax->startDocument(ctxt->userData);
9333
9334 /*
9335 * Doing validity checking on chunk doesn't make sense
9336 */
9337 ctxt->instate = XML_PARSER_CONTENT;
9338 ctxt->validate = 0;
9339 ctxt->loadsubset = 0;
9340 ctxt->depth = 0;
9341
9342 xmlParseContent(ctxt);
9343
9344 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009345 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009346 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009347 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009348 }
9349
9350 /*
9351 * SAX: end of the document processing.
9352 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009353 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009354 ctxt->sax->endDocument(ctxt->userData);
9355
9356 if (! ctxt->wellFormed) return(-1);
9357 return(0);
9358}
9359
Daniel Veillard73b013f2003-09-30 12:36:01 +00009360#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009361/************************************************************************
9362 * *
9363 * Progressive parsing interfaces *
9364 * *
9365 ************************************************************************/
9366
9367/**
9368 * xmlParseLookupSequence:
9369 * @ctxt: an XML parser context
9370 * @first: the first char to lookup
9371 * @next: the next char to lookup or zero
9372 * @third: the next char to lookup or zero
9373 *
9374 * Try to find if a sequence (first, next, third) or just (first next) or
9375 * (first) is available in the input stream.
9376 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9377 * to avoid rescanning sequences of bytes, it DOES change the state of the
9378 * parser, do not use liberally.
9379 *
9380 * Returns the index to the current parsing point if the full sequence
9381 * is available, -1 otherwise.
9382 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009383static int
Owen Taylor3473f882001-02-23 17:55:21 +00009384xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9385 xmlChar next, xmlChar third) {
9386 int base, len;
9387 xmlParserInputPtr in;
9388 const xmlChar *buf;
9389
9390 in = ctxt->input;
9391 if (in == NULL) return(-1);
9392 base = in->cur - in->base;
9393 if (base < 0) return(-1);
9394 if (ctxt->checkIndex > base)
9395 base = ctxt->checkIndex;
9396 if (in->buf == NULL) {
9397 buf = in->base;
9398 len = in->length;
9399 } else {
9400 buf = in->buf->buffer->content;
9401 len = in->buf->buffer->use;
9402 }
9403 /* take into account the sequence length */
9404 if (third) len -= 2;
9405 else if (next) len --;
9406 for (;base < len;base++) {
9407 if (buf[base] == first) {
9408 if (third != 0) {
9409 if ((buf[base + 1] != next) ||
9410 (buf[base + 2] != third)) continue;
9411 } else if (next != 0) {
9412 if (buf[base + 1] != next) continue;
9413 }
9414 ctxt->checkIndex = 0;
9415#ifdef DEBUG_PUSH
9416 if (next == 0)
9417 xmlGenericError(xmlGenericErrorContext,
9418 "PP: lookup '%c' found at %d\n",
9419 first, base);
9420 else if (third == 0)
9421 xmlGenericError(xmlGenericErrorContext,
9422 "PP: lookup '%c%c' found at %d\n",
9423 first, next, base);
9424 else
9425 xmlGenericError(xmlGenericErrorContext,
9426 "PP: lookup '%c%c%c' found at %d\n",
9427 first, next, third, base);
9428#endif
9429 return(base - (in->cur - in->base));
9430 }
9431 }
9432 ctxt->checkIndex = base;
9433#ifdef DEBUG_PUSH
9434 if (next == 0)
9435 xmlGenericError(xmlGenericErrorContext,
9436 "PP: lookup '%c' failed\n", first);
9437 else if (third == 0)
9438 xmlGenericError(xmlGenericErrorContext,
9439 "PP: lookup '%c%c' failed\n", first, next);
9440 else
9441 xmlGenericError(xmlGenericErrorContext,
9442 "PP: lookup '%c%c%c' failed\n", first, next, third);
9443#endif
9444 return(-1);
9445}
9446
9447/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 * xmlParseGetLasts:
9449 * @ctxt: an XML parser context
9450 * @lastlt: pointer to store the last '<' from the input
9451 * @lastgt: pointer to store the last '>' from the input
9452 *
9453 * Lookup the last < and > in the current chunk
9454 */
9455static void
9456xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9457 const xmlChar **lastgt) {
9458 const xmlChar *tmp;
9459
9460 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9461 xmlGenericError(xmlGenericErrorContext,
9462 "Internal error: xmlParseGetLasts\n");
9463 return;
9464 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009465 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 tmp = ctxt->input->end;
9467 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009468 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009469 if (tmp < ctxt->input->base) {
9470 *lastlt = NULL;
9471 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009473 *lastlt = tmp;
9474 tmp++;
9475 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9476 if (*tmp == '\'') {
9477 tmp++;
9478 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9479 if (tmp < ctxt->input->end) tmp++;
9480 } else if (*tmp == '"') {
9481 tmp++;
9482 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9483 if (tmp < ctxt->input->end) tmp++;
9484 } else
9485 tmp++;
9486 }
9487 if (tmp < ctxt->input->end)
9488 *lastgt = tmp;
9489 else {
9490 tmp = *lastlt;
9491 tmp--;
9492 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9493 if (tmp >= ctxt->input->base)
9494 *lastgt = tmp;
9495 else
9496 *lastgt = NULL;
9497 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009498 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 } else {
9500 *lastlt = NULL;
9501 *lastgt = NULL;
9502 }
9503}
9504/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009505 * xmlCheckCdataPush:
9506 * @cur: pointer to the bock of characters
9507 * @len: length of the block in bytes
9508 *
9509 * Check that the block of characters is okay as SCdata content [20]
9510 *
9511 * Returns the number of bytes to pass if okay, a negative index where an
9512 * UTF-8 error occured otherwise
9513 */
9514static int
9515xmlCheckCdataPush(const xmlChar *utf, int len) {
9516 int ix;
9517 unsigned char c;
9518 int codepoint;
9519
9520 if ((utf == NULL) || (len <= 0))
9521 return(0);
9522
9523 for (ix = 0; ix < len;) { /* string is 0-terminated */
9524 c = utf[ix];
9525 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9526 if (c >= 0x20)
9527 ix++;
9528 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9529 ix++;
9530 else
9531 return(-ix);
9532 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9533 if (ix + 2 > len) return(ix);
9534 if ((utf[ix+1] & 0xc0 ) != 0x80)
9535 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009536 codepoint = (utf[ix] & 0x1f) << 6;
9537 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009538 if (!xmlIsCharQ(codepoint))
9539 return(-ix);
9540 ix += 2;
9541 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9542 if (ix + 3 > len) return(ix);
9543 if (((utf[ix+1] & 0xc0) != 0x80) ||
9544 ((utf[ix+2] & 0xc0) != 0x80))
9545 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009546 codepoint = (utf[ix] & 0xf) << 12;
9547 codepoint |= (utf[ix+1] & 0x3f) << 6;
9548 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009549 if (!xmlIsCharQ(codepoint))
9550 return(-ix);
9551 ix += 3;
9552 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9553 if (ix + 4 > len) return(ix);
9554 if (((utf[ix+1] & 0xc0) != 0x80) ||
9555 ((utf[ix+2] & 0xc0) != 0x80) ||
9556 ((utf[ix+3] & 0xc0) != 0x80))
9557 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009558 codepoint = (utf[ix] & 0x7) << 18;
9559 codepoint |= (utf[ix+1] & 0x3f) << 12;
9560 codepoint |= (utf[ix+2] & 0x3f) << 6;
9561 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009562 if (!xmlIsCharQ(codepoint))
9563 return(-ix);
9564 ix += 4;
9565 } else /* unknown encoding */
9566 return(-ix);
9567 }
9568 return(ix);
9569}
9570
9571/**
Owen Taylor3473f882001-02-23 17:55:21 +00009572 * xmlParseTryOrFinish:
9573 * @ctxt: an XML parser context
9574 * @terminate: last chunk indicator
9575 *
9576 * Try to progress on parsing
9577 *
9578 * Returns zero if no parsing was possible
9579 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009580static int
Owen Taylor3473f882001-02-23 17:55:21 +00009581xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9582 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009583 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009584 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009585 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009586
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009587 if (ctxt->input == NULL)
9588 return(0);
9589
Owen Taylor3473f882001-02-23 17:55:21 +00009590#ifdef DEBUG_PUSH
9591 switch (ctxt->instate) {
9592 case XML_PARSER_EOF:
9593 xmlGenericError(xmlGenericErrorContext,
9594 "PP: try EOF\n"); break;
9595 case XML_PARSER_START:
9596 xmlGenericError(xmlGenericErrorContext,
9597 "PP: try START\n"); break;
9598 case XML_PARSER_MISC:
9599 xmlGenericError(xmlGenericErrorContext,
9600 "PP: try MISC\n");break;
9601 case XML_PARSER_COMMENT:
9602 xmlGenericError(xmlGenericErrorContext,
9603 "PP: try COMMENT\n");break;
9604 case XML_PARSER_PROLOG:
9605 xmlGenericError(xmlGenericErrorContext,
9606 "PP: try PROLOG\n");break;
9607 case XML_PARSER_START_TAG:
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: try START_TAG\n");break;
9610 case XML_PARSER_CONTENT:
9611 xmlGenericError(xmlGenericErrorContext,
9612 "PP: try CONTENT\n");break;
9613 case XML_PARSER_CDATA_SECTION:
9614 xmlGenericError(xmlGenericErrorContext,
9615 "PP: try CDATA_SECTION\n");break;
9616 case XML_PARSER_END_TAG:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: try END_TAG\n");break;
9619 case XML_PARSER_ENTITY_DECL:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: try ENTITY_DECL\n");break;
9622 case XML_PARSER_ENTITY_VALUE:
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: try ENTITY_VALUE\n");break;
9625 case XML_PARSER_ATTRIBUTE_VALUE:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: try ATTRIBUTE_VALUE\n");break;
9628 case XML_PARSER_DTD:
9629 xmlGenericError(xmlGenericErrorContext,
9630 "PP: try DTD\n");break;
9631 case XML_PARSER_EPILOG:
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: try EPILOG\n");break;
9634 case XML_PARSER_PI:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: try PI\n");break;
9637 case XML_PARSER_IGNORE:
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: try IGNORE\n");break;
9640 }
9641#endif
9642
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009643 if ((ctxt->input != NULL) &&
9644 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009645 xmlSHRINK(ctxt);
9646 ctxt->checkIndex = 0;
9647 }
9648 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009649
Daniel Veillarda880b122003-04-21 21:36:41 +00009650 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009651 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009652 return(0);
9653
9654
Owen Taylor3473f882001-02-23 17:55:21 +00009655 /*
9656 * Pop-up of finished entities.
9657 */
9658 while ((RAW == 0) && (ctxt->inputNr > 1))
9659 xmlPopInput(ctxt);
9660
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009661 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009662 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009663 avail = ctxt->input->length -
9664 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009665 else {
9666 /*
9667 * If we are operating on converted input, try to flush
9668 * remainng chars to avoid them stalling in the non-converted
9669 * buffer.
9670 */
9671 if ((ctxt->input->buf->raw != NULL) &&
9672 (ctxt->input->buf->raw->use > 0)) {
9673 int base = ctxt->input->base -
9674 ctxt->input->buf->buffer->content;
9675 int current = ctxt->input->cur - ctxt->input->base;
9676
9677 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9678 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9679 ctxt->input->cur = ctxt->input->base + current;
9680 ctxt->input->end =
9681 &ctxt->input->buf->buffer->content[
9682 ctxt->input->buf->buffer->use];
9683 }
9684 avail = ctxt->input->buf->buffer->use -
9685 (ctxt->input->cur - ctxt->input->base);
9686 }
Owen Taylor3473f882001-02-23 17:55:21 +00009687 if (avail < 1)
9688 goto done;
9689 switch (ctxt->instate) {
9690 case XML_PARSER_EOF:
9691 /*
9692 * Document parsing is done !
9693 */
9694 goto done;
9695 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009696 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9697 xmlChar start[4];
9698 xmlCharEncoding enc;
9699
9700 /*
9701 * Very first chars read from the document flow.
9702 */
9703 if (avail < 4)
9704 goto done;
9705
9706 /*
9707 * Get the 4 first bytes and decode the charset
9708 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009709 * plug some encoding conversion routines,
9710 * else xmlSwitchEncoding will set to (default)
9711 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009712 */
9713 start[0] = RAW;
9714 start[1] = NXT(1);
9715 start[2] = NXT(2);
9716 start[3] = NXT(3);
9717 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009718 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009719 break;
9720 }
Owen Taylor3473f882001-02-23 17:55:21 +00009721
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009722 if (avail < 2)
9723 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009724 cur = ctxt->input->cur[0];
9725 next = ctxt->input->cur[1];
9726 if (cur == 0) {
9727 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9728 ctxt->sax->setDocumentLocator(ctxt->userData,
9729 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009730 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009731 ctxt->instate = XML_PARSER_EOF;
9732#ifdef DEBUG_PUSH
9733 xmlGenericError(xmlGenericErrorContext,
9734 "PP: entering EOF\n");
9735#endif
9736 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9737 ctxt->sax->endDocument(ctxt->userData);
9738 goto done;
9739 }
9740 if ((cur == '<') && (next == '?')) {
9741 /* PI or XML decl */
9742 if (avail < 5) return(ret);
9743 if ((!terminate) &&
9744 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9745 return(ret);
9746 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9747 ctxt->sax->setDocumentLocator(ctxt->userData,
9748 &xmlDefaultSAXLocator);
9749 if ((ctxt->input->cur[2] == 'x') &&
9750 (ctxt->input->cur[3] == 'm') &&
9751 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009752 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009753 ret += 5;
9754#ifdef DEBUG_PUSH
9755 xmlGenericError(xmlGenericErrorContext,
9756 "PP: Parsing XML Decl\n");
9757#endif
9758 xmlParseXMLDecl(ctxt);
9759 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9760 /*
9761 * The XML REC instructs us to stop parsing right
9762 * here
9763 */
9764 ctxt->instate = XML_PARSER_EOF;
9765 return(0);
9766 }
9767 ctxt->standalone = ctxt->input->standalone;
9768 if ((ctxt->encoding == NULL) &&
9769 (ctxt->input->encoding != NULL))
9770 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9771 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9772 (!ctxt->disableSAX))
9773 ctxt->sax->startDocument(ctxt->userData);
9774 ctxt->instate = XML_PARSER_MISC;
9775#ifdef DEBUG_PUSH
9776 xmlGenericError(xmlGenericErrorContext,
9777 "PP: entering MISC\n");
9778#endif
9779 } else {
9780 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9781 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9782 (!ctxt->disableSAX))
9783 ctxt->sax->startDocument(ctxt->userData);
9784 ctxt->instate = XML_PARSER_MISC;
9785#ifdef DEBUG_PUSH
9786 xmlGenericError(xmlGenericErrorContext,
9787 "PP: entering MISC\n");
9788#endif
9789 }
9790 } else {
9791 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9792 ctxt->sax->setDocumentLocator(ctxt->userData,
9793 &xmlDefaultSAXLocator);
9794 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009795 if (ctxt->version == NULL) {
9796 xmlErrMemory(ctxt, NULL);
9797 break;
9798 }
Owen Taylor3473f882001-02-23 17:55:21 +00009799 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9800 (!ctxt->disableSAX))
9801 ctxt->sax->startDocument(ctxt->userData);
9802 ctxt->instate = XML_PARSER_MISC;
9803#ifdef DEBUG_PUSH
9804 xmlGenericError(xmlGenericErrorContext,
9805 "PP: entering MISC\n");
9806#endif
9807 }
9808 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009810 const xmlChar *name;
9811 const xmlChar *prefix;
9812 const xmlChar *URI;
9813 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009814
9815 if ((avail < 2) && (ctxt->inputNr == 1))
9816 goto done;
9817 cur = ctxt->input->cur[0];
9818 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009819 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009820 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009821 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9822 ctxt->sax->endDocument(ctxt->userData);
9823 goto done;
9824 }
9825 if (!terminate) {
9826 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009827 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009828 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009829 goto done;
9830 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9831 goto done;
9832 }
9833 }
9834 if (ctxt->spaceNr == 0)
9835 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009836 else if (*ctxt->space == -2)
9837 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 else
9839 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009840#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009841 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009842#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009843 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009844#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009845 else
9846 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009847#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009848 if (name == NULL) {
9849 spacePop(ctxt);
9850 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009851 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9852 ctxt->sax->endDocument(ctxt->userData);
9853 goto done;
9854 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009855#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009856 /*
9857 * [ VC: Root Element Type ]
9858 * The Name in the document type declaration must match
9859 * the element type of the root element.
9860 */
9861 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9862 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9863 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009864#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009865
9866 /*
9867 * Check for an Empty Element.
9868 */
9869 if ((RAW == '/') && (NXT(1) == '>')) {
9870 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009871
9872 if (ctxt->sax2) {
9873 if ((ctxt->sax != NULL) &&
9874 (ctxt->sax->endElementNs != NULL) &&
9875 (!ctxt->disableSAX))
9876 ctxt->sax->endElementNs(ctxt->userData, name,
9877 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009878 if (ctxt->nsNr - nsNr > 0)
9879 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009880#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009881 } else {
9882 if ((ctxt->sax != NULL) &&
9883 (ctxt->sax->endElement != NULL) &&
9884 (!ctxt->disableSAX))
9885 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009886#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009888 spacePop(ctxt);
9889 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009890 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009891 } else {
9892 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009893 }
9894 break;
9895 }
9896 if (RAW == '>') {
9897 NEXT;
9898 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009899 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009900 "Couldn't find end of Start Tag %s\n",
9901 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009902 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009903 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009905 if (ctxt->sax2)
9906 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009907#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009908 else
9909 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009910#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009911
Daniel Veillarda880b122003-04-21 21:36:41 +00009912 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009913 break;
9914 }
9915 case XML_PARSER_CONTENT: {
9916 const xmlChar *test;
9917 unsigned int cons;
9918 if ((avail < 2) && (ctxt->inputNr == 1))
9919 goto done;
9920 cur = ctxt->input->cur[0];
9921 next = ctxt->input->cur[1];
9922
9923 test = CUR_PTR;
9924 cons = ctxt->input->consumed;
9925 if ((cur == '<') && (next == '/')) {
9926 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009927 break;
9928 } else if ((cur == '<') && (next == '?')) {
9929 if ((!terminate) &&
9930 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9931 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009932 xmlParsePI(ctxt);
9933 } else if ((cur == '<') && (next != '!')) {
9934 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009935 break;
9936 } else if ((cur == '<') && (next == '!') &&
9937 (ctxt->input->cur[2] == '-') &&
9938 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009939 int term;
9940
9941 if (avail < 4)
9942 goto done;
9943 ctxt->input->cur += 4;
9944 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9945 ctxt->input->cur -= 4;
9946 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009947 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009948 xmlParseComment(ctxt);
9949 ctxt->instate = XML_PARSER_CONTENT;
9950 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9951 (ctxt->input->cur[2] == '[') &&
9952 (ctxt->input->cur[3] == 'C') &&
9953 (ctxt->input->cur[4] == 'D') &&
9954 (ctxt->input->cur[5] == 'A') &&
9955 (ctxt->input->cur[6] == 'T') &&
9956 (ctxt->input->cur[7] == 'A') &&
9957 (ctxt->input->cur[8] == '[')) {
9958 SKIP(9);
9959 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009960 break;
9961 } else if ((cur == '<') && (next == '!') &&
9962 (avail < 9)) {
9963 goto done;
9964 } else if (cur == '&') {
9965 if ((!terminate) &&
9966 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9967 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009968 xmlParseReference(ctxt);
9969 } else {
9970 /* TODO Avoid the extra copy, handle directly !!! */
9971 /*
9972 * Goal of the following test is:
9973 * - minimize calls to the SAX 'character' callback
9974 * when they are mergeable
9975 * - handle an problem for isBlank when we only parse
9976 * a sequence of blank chars and the next one is
9977 * not available to check against '<' presence.
9978 * - tries to homogenize the differences in SAX
9979 * callbacks between the push and pull versions
9980 * of the parser.
9981 */
9982 if ((ctxt->inputNr == 1) &&
9983 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9984 if (!terminate) {
9985 if (ctxt->progressive) {
9986 if ((lastlt == NULL) ||
9987 (ctxt->input->cur > lastlt))
9988 goto done;
9989 } else if (xmlParseLookupSequence(ctxt,
9990 '<', 0, 0) < 0) {
9991 goto done;
9992 }
9993 }
9994 }
9995 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009996 xmlParseCharData(ctxt, 0);
9997 }
9998 /*
9999 * Pop-up of finished entities.
10000 */
10001 while ((RAW == 0) && (ctxt->inputNr > 1))
10002 xmlPopInput(ctxt);
10003 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010004 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10005 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010006 ctxt->instate = XML_PARSER_EOF;
10007 break;
10008 }
10009 break;
10010 }
10011 case XML_PARSER_END_TAG:
10012 if (avail < 2)
10013 goto done;
10014 if (!terminate) {
10015 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010016 /* > can be found unescaped in attribute values */
10017 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010018 goto done;
10019 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10020 goto done;
10021 }
10022 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010023 if (ctxt->sax2) {
10024 xmlParseEndTag2(ctxt,
10025 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10026 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010027 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010028 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010029 }
10030#ifdef LIBXML_SAX1_ENABLED
10031 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010032 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010033#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010034 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010035 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010036 } else {
10037 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010038 }
10039 break;
10040 case XML_PARSER_CDATA_SECTION: {
10041 /*
10042 * The Push mode need to have the SAX callback for
10043 * cdataBlock merge back contiguous callbacks.
10044 */
10045 int base;
10046
10047 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10048 if (base < 0) {
10049 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010050 int tmp;
10051
10052 tmp = xmlCheckCdataPush(ctxt->input->cur,
10053 XML_PARSER_BIG_BUFFER_SIZE);
10054 if (tmp < 0) {
10055 tmp = -tmp;
10056 ctxt->input->cur += tmp;
10057 goto encoding_error;
10058 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010059 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10060 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010061 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010062 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010063 else if (ctxt->sax->characters != NULL)
10064 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010065 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010066 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010067 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010068 ctxt->checkIndex = 0;
10069 }
10070 goto done;
10071 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010072 int tmp;
10073
10074 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10075 if ((tmp < 0) || (tmp != base)) {
10076 tmp = -tmp;
10077 ctxt->input->cur += tmp;
10078 goto encoding_error;
10079 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010080 if ((ctxt->sax != NULL) && (base > 0) &&
10081 (!ctxt->disableSAX)) {
10082 if (ctxt->sax->cdataBlock != NULL)
10083 ctxt->sax->cdataBlock(ctxt->userData,
10084 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010085 else if (ctxt->sax->characters != NULL)
10086 ctxt->sax->characters(ctxt->userData,
10087 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010088 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010089 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010090 ctxt->checkIndex = 0;
10091 ctxt->instate = XML_PARSER_CONTENT;
10092#ifdef DEBUG_PUSH
10093 xmlGenericError(xmlGenericErrorContext,
10094 "PP: entering CONTENT\n");
10095#endif
10096 }
10097 break;
10098 }
Owen Taylor3473f882001-02-23 17:55:21 +000010099 case XML_PARSER_MISC:
10100 SKIP_BLANKS;
10101 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010102 avail = ctxt->input->length -
10103 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010104 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010105 avail = ctxt->input->buf->buffer->use -
10106 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010107 if (avail < 2)
10108 goto done;
10109 cur = ctxt->input->cur[0];
10110 next = ctxt->input->cur[1];
10111 if ((cur == '<') && (next == '?')) {
10112 if ((!terminate) &&
10113 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10114 goto done;
10115#ifdef DEBUG_PUSH
10116 xmlGenericError(xmlGenericErrorContext,
10117 "PP: Parsing PI\n");
10118#endif
10119 xmlParsePI(ctxt);
10120 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010121 (ctxt->input->cur[2] == '-') &&
10122 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010123 if ((!terminate) &&
10124 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10125 goto done;
10126#ifdef DEBUG_PUSH
10127 xmlGenericError(xmlGenericErrorContext,
10128 "PP: Parsing Comment\n");
10129#endif
10130 xmlParseComment(ctxt);
10131 ctxt->instate = XML_PARSER_MISC;
10132 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010133 (ctxt->input->cur[2] == 'D') &&
10134 (ctxt->input->cur[3] == 'O') &&
10135 (ctxt->input->cur[4] == 'C') &&
10136 (ctxt->input->cur[5] == 'T') &&
10137 (ctxt->input->cur[6] == 'Y') &&
10138 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010139 (ctxt->input->cur[8] == 'E')) {
10140 if ((!terminate) &&
10141 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10142 goto done;
10143#ifdef DEBUG_PUSH
10144 xmlGenericError(xmlGenericErrorContext,
10145 "PP: Parsing internal subset\n");
10146#endif
10147 ctxt->inSubset = 1;
10148 xmlParseDocTypeDecl(ctxt);
10149 if (RAW == '[') {
10150 ctxt->instate = XML_PARSER_DTD;
10151#ifdef DEBUG_PUSH
10152 xmlGenericError(xmlGenericErrorContext,
10153 "PP: entering DTD\n");
10154#endif
10155 } else {
10156 /*
10157 * Create and update the external subset.
10158 */
10159 ctxt->inSubset = 2;
10160 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10161 (ctxt->sax->externalSubset != NULL))
10162 ctxt->sax->externalSubset(ctxt->userData,
10163 ctxt->intSubName, ctxt->extSubSystem,
10164 ctxt->extSubURI);
10165 ctxt->inSubset = 0;
10166 ctxt->instate = XML_PARSER_PROLOG;
10167#ifdef DEBUG_PUSH
10168 xmlGenericError(xmlGenericErrorContext,
10169 "PP: entering PROLOG\n");
10170#endif
10171 }
10172 } else if ((cur == '<') && (next == '!') &&
10173 (avail < 9)) {
10174 goto done;
10175 } else {
10176 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010177 ctxt->progressive = 1;
10178 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010179#ifdef DEBUG_PUSH
10180 xmlGenericError(xmlGenericErrorContext,
10181 "PP: entering START_TAG\n");
10182#endif
10183 }
10184 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010185 case XML_PARSER_PROLOG:
10186 SKIP_BLANKS;
10187 if (ctxt->input->buf == NULL)
10188 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10189 else
10190 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10191 if (avail < 2)
10192 goto done;
10193 cur = ctxt->input->cur[0];
10194 next = ctxt->input->cur[1];
10195 if ((cur == '<') && (next == '?')) {
10196 if ((!terminate) &&
10197 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10198 goto done;
10199#ifdef DEBUG_PUSH
10200 xmlGenericError(xmlGenericErrorContext,
10201 "PP: Parsing PI\n");
10202#endif
10203 xmlParsePI(ctxt);
10204 } else if ((cur == '<') && (next == '!') &&
10205 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10206 if ((!terminate) &&
10207 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10208 goto done;
10209#ifdef DEBUG_PUSH
10210 xmlGenericError(xmlGenericErrorContext,
10211 "PP: Parsing Comment\n");
10212#endif
10213 xmlParseComment(ctxt);
10214 ctxt->instate = XML_PARSER_PROLOG;
10215 } else if ((cur == '<') && (next == '!') &&
10216 (avail < 4)) {
10217 goto done;
10218 } else {
10219 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010220 if (ctxt->progressive == 0)
10221 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010222 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010223#ifdef DEBUG_PUSH
10224 xmlGenericError(xmlGenericErrorContext,
10225 "PP: entering START_TAG\n");
10226#endif
10227 }
10228 break;
10229 case XML_PARSER_EPILOG:
10230 SKIP_BLANKS;
10231 if (ctxt->input->buf == NULL)
10232 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10233 else
10234 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10235 if (avail < 2)
10236 goto done;
10237 cur = ctxt->input->cur[0];
10238 next = ctxt->input->cur[1];
10239 if ((cur == '<') && (next == '?')) {
10240 if ((!terminate) &&
10241 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10242 goto done;
10243#ifdef DEBUG_PUSH
10244 xmlGenericError(xmlGenericErrorContext,
10245 "PP: Parsing PI\n");
10246#endif
10247 xmlParsePI(ctxt);
10248 ctxt->instate = XML_PARSER_EPILOG;
10249 } else if ((cur == '<') && (next == '!') &&
10250 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10251 if ((!terminate) &&
10252 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10253 goto done;
10254#ifdef DEBUG_PUSH
10255 xmlGenericError(xmlGenericErrorContext,
10256 "PP: Parsing Comment\n");
10257#endif
10258 xmlParseComment(ctxt);
10259 ctxt->instate = XML_PARSER_EPILOG;
10260 } else if ((cur == '<') && (next == '!') &&
10261 (avail < 4)) {
10262 goto done;
10263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010264 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010265 ctxt->instate = XML_PARSER_EOF;
10266#ifdef DEBUG_PUSH
10267 xmlGenericError(xmlGenericErrorContext,
10268 "PP: entering EOF\n");
10269#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010270 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010271 ctxt->sax->endDocument(ctxt->userData);
10272 goto done;
10273 }
10274 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010275 case XML_PARSER_DTD: {
10276 /*
10277 * Sorry but progressive parsing of the internal subset
10278 * is not expected to be supported. We first check that
10279 * the full content of the internal subset is available and
10280 * the parsing is launched only at that point.
10281 * Internal subset ends up with "']' S? '>'" in an unescaped
10282 * section and not in a ']]>' sequence which are conditional
10283 * sections (whoever argued to keep that crap in XML deserve
10284 * a place in hell !).
10285 */
10286 int base, i;
10287 xmlChar *buf;
10288 xmlChar quote = 0;
10289
10290 base = ctxt->input->cur - ctxt->input->base;
10291 if (base < 0) return(0);
10292 if (ctxt->checkIndex > base)
10293 base = ctxt->checkIndex;
10294 buf = ctxt->input->buf->buffer->content;
10295 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10296 base++) {
10297 if (quote != 0) {
10298 if (buf[base] == quote)
10299 quote = 0;
10300 continue;
10301 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010302 if ((quote == 0) && (buf[base] == '<')) {
10303 int found = 0;
10304 /* special handling of comments */
10305 if (((unsigned int) base + 4 <
10306 ctxt->input->buf->buffer->use) &&
10307 (buf[base + 1] == '!') &&
10308 (buf[base + 2] == '-') &&
10309 (buf[base + 3] == '-')) {
10310 for (;(unsigned int) base + 3 <
10311 ctxt->input->buf->buffer->use; base++) {
10312 if ((buf[base] == '-') &&
10313 (buf[base + 1] == '-') &&
10314 (buf[base + 2] == '>')) {
10315 found = 1;
10316 base += 2;
10317 break;
10318 }
10319 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010320 if (!found) {
10321#if 0
10322 fprintf(stderr, "unfinished comment\n");
10323#endif
10324 break; /* for */
10325 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010326 continue;
10327 }
10328 }
Owen Taylor3473f882001-02-23 17:55:21 +000010329 if (buf[base] == '"') {
10330 quote = '"';
10331 continue;
10332 }
10333 if (buf[base] == '\'') {
10334 quote = '\'';
10335 continue;
10336 }
10337 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010338#if 0
10339 fprintf(stderr, "%c%c%c%c: ", buf[base],
10340 buf[base + 1], buf[base + 2], buf[base + 3]);
10341#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010342 if ((unsigned int) base +1 >=
10343 ctxt->input->buf->buffer->use)
10344 break;
10345 if (buf[base + 1] == ']') {
10346 /* conditional crap, skip both ']' ! */
10347 base++;
10348 continue;
10349 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010350 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010351 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10352 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010353 if (buf[base + i] == '>') {
10354#if 0
10355 fprintf(stderr, "found\n");
10356#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010357 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010358 }
10359 if (!IS_BLANK_CH(buf[base + i])) {
10360#if 0
10361 fprintf(stderr, "not found\n");
10362#endif
10363 goto not_end_of_int_subset;
10364 }
Owen Taylor3473f882001-02-23 17:55:21 +000010365 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010366#if 0
10367 fprintf(stderr, "end of stream\n");
10368#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010369 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010370
Owen Taylor3473f882001-02-23 17:55:21 +000010371 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010372not_end_of_int_subset:
10373 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010374 }
10375 /*
10376 * We didn't found the end of the Internal subset
10377 */
Owen Taylor3473f882001-02-23 17:55:21 +000010378#ifdef DEBUG_PUSH
10379 if (next == 0)
10380 xmlGenericError(xmlGenericErrorContext,
10381 "PP: lookup of int subset end filed\n");
10382#endif
10383 goto done;
10384
10385found_end_int_subset:
10386 xmlParseInternalSubset(ctxt);
10387 ctxt->inSubset = 2;
10388 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10389 (ctxt->sax->externalSubset != NULL))
10390 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10391 ctxt->extSubSystem, ctxt->extSubURI);
10392 ctxt->inSubset = 0;
10393 ctxt->instate = XML_PARSER_PROLOG;
10394 ctxt->checkIndex = 0;
10395#ifdef DEBUG_PUSH
10396 xmlGenericError(xmlGenericErrorContext,
10397 "PP: entering PROLOG\n");
10398#endif
10399 break;
10400 }
10401 case XML_PARSER_COMMENT:
10402 xmlGenericError(xmlGenericErrorContext,
10403 "PP: internal error, state == COMMENT\n");
10404 ctxt->instate = XML_PARSER_CONTENT;
10405#ifdef DEBUG_PUSH
10406 xmlGenericError(xmlGenericErrorContext,
10407 "PP: entering CONTENT\n");
10408#endif
10409 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010410 case XML_PARSER_IGNORE:
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: internal error, state == IGNORE");
10413 ctxt->instate = XML_PARSER_DTD;
10414#ifdef DEBUG_PUSH
10415 xmlGenericError(xmlGenericErrorContext,
10416 "PP: entering DTD\n");
10417#endif
10418 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010419 case XML_PARSER_PI:
10420 xmlGenericError(xmlGenericErrorContext,
10421 "PP: internal error, state == PI\n");
10422 ctxt->instate = XML_PARSER_CONTENT;
10423#ifdef DEBUG_PUSH
10424 xmlGenericError(xmlGenericErrorContext,
10425 "PP: entering CONTENT\n");
10426#endif
10427 break;
10428 case XML_PARSER_ENTITY_DECL:
10429 xmlGenericError(xmlGenericErrorContext,
10430 "PP: internal error, state == ENTITY_DECL\n");
10431 ctxt->instate = XML_PARSER_DTD;
10432#ifdef DEBUG_PUSH
10433 xmlGenericError(xmlGenericErrorContext,
10434 "PP: entering DTD\n");
10435#endif
10436 break;
10437 case XML_PARSER_ENTITY_VALUE:
10438 xmlGenericError(xmlGenericErrorContext,
10439 "PP: internal error, state == ENTITY_VALUE\n");
10440 ctxt->instate = XML_PARSER_CONTENT;
10441#ifdef DEBUG_PUSH
10442 xmlGenericError(xmlGenericErrorContext,
10443 "PP: entering DTD\n");
10444#endif
10445 break;
10446 case XML_PARSER_ATTRIBUTE_VALUE:
10447 xmlGenericError(xmlGenericErrorContext,
10448 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10449 ctxt->instate = XML_PARSER_START_TAG;
10450#ifdef DEBUG_PUSH
10451 xmlGenericError(xmlGenericErrorContext,
10452 "PP: entering START_TAG\n");
10453#endif
10454 break;
10455 case XML_PARSER_SYSTEM_LITERAL:
10456 xmlGenericError(xmlGenericErrorContext,
10457 "PP: internal error, state == SYSTEM_LITERAL\n");
10458 ctxt->instate = XML_PARSER_START_TAG;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: entering START_TAG\n");
10462#endif
10463 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010464 case XML_PARSER_PUBLIC_LITERAL:
10465 xmlGenericError(xmlGenericErrorContext,
10466 "PP: internal error, state == PUBLIC_LITERAL\n");
10467 ctxt->instate = XML_PARSER_START_TAG;
10468#ifdef DEBUG_PUSH
10469 xmlGenericError(xmlGenericErrorContext,
10470 "PP: entering START_TAG\n");
10471#endif
10472 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010473 }
10474 }
10475done:
10476#ifdef DEBUG_PUSH
10477 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10478#endif
10479 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010480encoding_error:
10481 {
10482 char buffer[150];
10483
10484 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10485 ctxt->input->cur[0], ctxt->input->cur[1],
10486 ctxt->input->cur[2], ctxt->input->cur[3]);
10487 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10488 "Input is not proper UTF-8, indicate encoding !\n%s",
10489 BAD_CAST buffer, NULL);
10490 }
10491 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010492}
10493
10494/**
Owen Taylor3473f882001-02-23 17:55:21 +000010495 * xmlParseChunk:
10496 * @ctxt: an XML parser context
10497 * @chunk: an char array
10498 * @size: the size in byte of the chunk
10499 * @terminate: last chunk indicator
10500 *
10501 * Parse a Chunk of memory
10502 *
10503 * Returns zero if no error, the xmlParserErrors otherwise.
10504 */
10505int
10506xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10507 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010508 int end_in_lf = 0;
10509
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010510 if (ctxt == NULL)
10511 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010512 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010513 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010514 if (ctxt->instate == XML_PARSER_START)
10515 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010516 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10517 (chunk[size - 1] == '\r')) {
10518 end_in_lf = 1;
10519 size--;
10520 }
Owen Taylor3473f882001-02-23 17:55:21 +000010521 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10522 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10523 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10524 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010525 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010526
William M. Bracka3215c72004-07-31 16:24:01 +000010527 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10528 if (res < 0) {
10529 ctxt->errNo = XML_PARSER_EOF;
10530 ctxt->disableSAX = 1;
10531 return (XML_PARSER_EOF);
10532 }
Owen Taylor3473f882001-02-23 17:55:21 +000010533 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10534 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010535 ctxt->input->end =
10536 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010537#ifdef DEBUG_PUSH
10538 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10539#endif
10540
Owen Taylor3473f882001-02-23 17:55:21 +000010541 } else if (ctxt->instate != XML_PARSER_EOF) {
10542 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10543 xmlParserInputBufferPtr in = ctxt->input->buf;
10544 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10545 (in->raw != NULL)) {
10546 int nbchars;
10547
10548 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10549 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010550 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010551 xmlGenericError(xmlGenericErrorContext,
10552 "xmlParseChunk: encoder error\n");
10553 return(XML_ERR_INVALID_ENCODING);
10554 }
10555 }
10556 }
10557 }
10558 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010559 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10560 (ctxt->input->buf != NULL)) {
10561 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10562 }
Daniel Veillard14412512005-01-21 23:53:26 +000010563 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010564 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010565 if (terminate) {
10566 /*
10567 * Check for termination
10568 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010569 int avail = 0;
10570
10571 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010572 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010573 avail = ctxt->input->length -
10574 (ctxt->input->cur - ctxt->input->base);
10575 else
10576 avail = ctxt->input->buf->buffer->use -
10577 (ctxt->input->cur - ctxt->input->base);
10578 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010579
Owen Taylor3473f882001-02-23 17:55:21 +000010580 if ((ctxt->instate != XML_PARSER_EOF) &&
10581 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010582 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010583 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010584 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010585 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010586 }
Owen Taylor3473f882001-02-23 17:55:21 +000010587 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010588 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010589 ctxt->sax->endDocument(ctxt->userData);
10590 }
10591 ctxt->instate = XML_PARSER_EOF;
10592 }
10593 return((xmlParserErrors) ctxt->errNo);
10594}
10595
10596/************************************************************************
10597 * *
10598 * I/O front end functions to the parser *
10599 * *
10600 ************************************************************************/
10601
10602/**
Owen Taylor3473f882001-02-23 17:55:21 +000010603 * xmlCreatePushParserCtxt:
10604 * @sax: a SAX handler
10605 * @user_data: The user data returned on SAX callbacks
10606 * @chunk: a pointer to an array of chars
10607 * @size: number of chars in the array
10608 * @filename: an optional file name or URI
10609 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010610 * Create a parser context for using the XML parser in push mode.
10611 * If @buffer and @size are non-NULL, the data is used to detect
10612 * the encoding. The remaining characters will be parsed so they
10613 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010614 * To allow content encoding detection, @size should be >= 4
10615 * The value of @filename is used for fetching external entities
10616 * and error/warning reports.
10617 *
10618 * Returns the new parser context or NULL
10619 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010620
Owen Taylor3473f882001-02-23 17:55:21 +000010621xmlParserCtxtPtr
10622xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10623 const char *chunk, int size, const char *filename) {
10624 xmlParserCtxtPtr ctxt;
10625 xmlParserInputPtr inputStream;
10626 xmlParserInputBufferPtr buf;
10627 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10628
10629 /*
10630 * plug some encoding conversion routines
10631 */
10632 if ((chunk != NULL) && (size >= 4))
10633 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10634
10635 buf = xmlAllocParserInputBuffer(enc);
10636 if (buf == NULL) return(NULL);
10637
10638 ctxt = xmlNewParserCtxt();
10639 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010640 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010641 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010642 return(NULL);
10643 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010644 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010645 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10646 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010647 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010648 xmlFreeParserInputBuffer(buf);
10649 xmlFreeParserCtxt(ctxt);
10650 return(NULL);
10651 }
Owen Taylor3473f882001-02-23 17:55:21 +000010652 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010653#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010654 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010655#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010656 xmlFree(ctxt->sax);
10657 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10658 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010659 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010660 xmlFreeParserInputBuffer(buf);
10661 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010662 return(NULL);
10663 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010664 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10665 if (sax->initialized == XML_SAX2_MAGIC)
10666 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10667 else
10668 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010669 if (user_data != NULL)
10670 ctxt->userData = user_data;
10671 }
10672 if (filename == NULL) {
10673 ctxt->directory = NULL;
10674 } else {
10675 ctxt->directory = xmlParserGetDirectory(filename);
10676 }
10677
10678 inputStream = xmlNewInputStream(ctxt);
10679 if (inputStream == NULL) {
10680 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010681 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010682 return(NULL);
10683 }
10684
10685 if (filename == NULL)
10686 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010687 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010688 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010689 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010690 if (inputStream->filename == NULL) {
10691 xmlFreeParserCtxt(ctxt);
10692 xmlFreeParserInputBuffer(buf);
10693 return(NULL);
10694 }
10695 }
Owen Taylor3473f882001-02-23 17:55:21 +000010696 inputStream->buf = buf;
10697 inputStream->base = inputStream->buf->buffer->content;
10698 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010699 inputStream->end =
10700 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010701
10702 inputPush(ctxt, inputStream);
10703
William M. Brack3a1cd212005-02-11 14:35:54 +000010704 /*
10705 * If the caller didn't provide an initial 'chunk' for determining
10706 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10707 * that it can be automatically determined later
10708 */
10709 if ((size == 0) || (chunk == NULL)) {
10710 ctxt->charset = XML_CHAR_ENCODING_NONE;
10711 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010712 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10713 int cur = ctxt->input->cur - ctxt->input->base;
10714
Owen Taylor3473f882001-02-23 17:55:21 +000010715 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010716
10717 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10718 ctxt->input->cur = ctxt->input->base + cur;
10719 ctxt->input->end =
10720 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010721#ifdef DEBUG_PUSH
10722 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10723#endif
10724 }
10725
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010726 if (enc != XML_CHAR_ENCODING_NONE) {
10727 xmlSwitchEncoding(ctxt, enc);
10728 }
10729
Owen Taylor3473f882001-02-23 17:55:21 +000010730 return(ctxt);
10731}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010732#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010733
10734/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010735 * xmlStopParser:
10736 * @ctxt: an XML parser context
10737 *
10738 * Blocks further parser processing
10739 */
10740void
10741xmlStopParser(xmlParserCtxtPtr ctxt) {
10742 if (ctxt == NULL)
10743 return;
10744 ctxt->instate = XML_PARSER_EOF;
10745 ctxt->disableSAX = 1;
10746 if (ctxt->input != NULL) {
10747 ctxt->input->cur = BAD_CAST"";
10748 ctxt->input->base = ctxt->input->cur;
10749 }
10750}
10751
10752/**
Owen Taylor3473f882001-02-23 17:55:21 +000010753 * xmlCreateIOParserCtxt:
10754 * @sax: a SAX handler
10755 * @user_data: The user data returned on SAX callbacks
10756 * @ioread: an I/O read function
10757 * @ioclose: an I/O close function
10758 * @ioctx: an I/O handler
10759 * @enc: the charset encoding if known
10760 *
10761 * Create a parser context for using the XML parser with an existing
10762 * I/O stream
10763 *
10764 * Returns the new parser context or NULL
10765 */
10766xmlParserCtxtPtr
10767xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10768 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10769 void *ioctx, xmlCharEncoding enc) {
10770 xmlParserCtxtPtr ctxt;
10771 xmlParserInputPtr inputStream;
10772 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010773
10774 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010775
10776 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10777 if (buf == NULL) return(NULL);
10778
10779 ctxt = xmlNewParserCtxt();
10780 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010781 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010782 return(NULL);
10783 }
10784 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010785#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010786 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010787#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010788 xmlFree(ctxt->sax);
10789 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10790 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010791 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010792 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010793 return(NULL);
10794 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010795 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10796 if (sax->initialized == XML_SAX2_MAGIC)
10797 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10798 else
10799 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010800 if (user_data != NULL)
10801 ctxt->userData = user_data;
10802 }
10803
10804 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10805 if (inputStream == NULL) {
10806 xmlFreeParserCtxt(ctxt);
10807 return(NULL);
10808 }
10809 inputPush(ctxt, inputStream);
10810
10811 return(ctxt);
10812}
10813
Daniel Veillard4432df22003-09-28 18:58:27 +000010814#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010815/************************************************************************
10816 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010817 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010818 * *
10819 ************************************************************************/
10820
10821/**
10822 * xmlIOParseDTD:
10823 * @sax: the SAX handler block or NULL
10824 * @input: an Input Buffer
10825 * @enc: the charset encoding if known
10826 *
10827 * Load and parse a DTD
10828 *
10829 * Returns the resulting xmlDtdPtr or NULL in case of error.
10830 * @input will be freed at parsing end.
10831 */
10832
10833xmlDtdPtr
10834xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10835 xmlCharEncoding enc) {
10836 xmlDtdPtr ret = NULL;
10837 xmlParserCtxtPtr ctxt;
10838 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010839 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010840
10841 if (input == NULL)
10842 return(NULL);
10843
10844 ctxt = xmlNewParserCtxt();
10845 if (ctxt == NULL) {
10846 return(NULL);
10847 }
10848
10849 /*
10850 * Set-up the SAX context
10851 */
10852 if (sax != NULL) {
10853 if (ctxt->sax != NULL)
10854 xmlFree(ctxt->sax);
10855 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010856 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010857 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010858 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010859
10860 /*
10861 * generate a parser input from the I/O handler
10862 */
10863
Daniel Veillard43caefb2003-12-07 19:32:22 +000010864 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010865 if (pinput == NULL) {
10866 if (sax != NULL) ctxt->sax = NULL;
10867 xmlFreeParserCtxt(ctxt);
10868 return(NULL);
10869 }
10870
10871 /*
10872 * plug some encoding conversion routines here.
10873 */
10874 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010875 if (enc != XML_CHAR_ENCODING_NONE) {
10876 xmlSwitchEncoding(ctxt, enc);
10877 }
Owen Taylor3473f882001-02-23 17:55:21 +000010878
10879 pinput->filename = NULL;
10880 pinput->line = 1;
10881 pinput->col = 1;
10882 pinput->base = ctxt->input->cur;
10883 pinput->cur = ctxt->input->cur;
10884 pinput->free = NULL;
10885
10886 /*
10887 * let's parse that entity knowing it's an external subset.
10888 */
10889 ctxt->inSubset = 2;
10890 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10892 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010893
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010894 if ((enc == XML_CHAR_ENCODING_NONE) &&
10895 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010896 /*
10897 * Get the 4 first bytes and decode the charset
10898 * if enc != XML_CHAR_ENCODING_NONE
10899 * plug some encoding conversion routines.
10900 */
10901 start[0] = RAW;
10902 start[1] = NXT(1);
10903 start[2] = NXT(2);
10904 start[3] = NXT(3);
10905 enc = xmlDetectCharEncoding(start, 4);
10906 if (enc != XML_CHAR_ENCODING_NONE) {
10907 xmlSwitchEncoding(ctxt, enc);
10908 }
10909 }
10910
Owen Taylor3473f882001-02-23 17:55:21 +000010911 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10912
10913 if (ctxt->myDoc != NULL) {
10914 if (ctxt->wellFormed) {
10915 ret = ctxt->myDoc->extSubset;
10916 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010917 if (ret != NULL) {
10918 xmlNodePtr tmp;
10919
10920 ret->doc = NULL;
10921 tmp = ret->children;
10922 while (tmp != NULL) {
10923 tmp->doc = NULL;
10924 tmp = tmp->next;
10925 }
10926 }
Owen Taylor3473f882001-02-23 17:55:21 +000010927 } else {
10928 ret = NULL;
10929 }
10930 xmlFreeDoc(ctxt->myDoc);
10931 ctxt->myDoc = NULL;
10932 }
10933 if (sax != NULL) ctxt->sax = NULL;
10934 xmlFreeParserCtxt(ctxt);
10935
10936 return(ret);
10937}
10938
10939/**
10940 * xmlSAXParseDTD:
10941 * @sax: the SAX handler block
10942 * @ExternalID: a NAME* containing the External ID of the DTD
10943 * @SystemID: a NAME* containing the URL to the DTD
10944 *
10945 * Load and parse an external subset.
10946 *
10947 * Returns the resulting xmlDtdPtr or NULL in case of error.
10948 */
10949
10950xmlDtdPtr
10951xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10952 const xmlChar *SystemID) {
10953 xmlDtdPtr ret = NULL;
10954 xmlParserCtxtPtr ctxt;
10955 xmlParserInputPtr input = NULL;
10956 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010957 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010958
10959 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10960
10961 ctxt = xmlNewParserCtxt();
10962 if (ctxt == NULL) {
10963 return(NULL);
10964 }
10965
10966 /*
10967 * Set-up the SAX context
10968 */
10969 if (sax != NULL) {
10970 if (ctxt->sax != NULL)
10971 xmlFree(ctxt->sax);
10972 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010973 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010974 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010975
10976 /*
10977 * Canonicalise the system ID
10978 */
10979 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010980 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010981 xmlFreeParserCtxt(ctxt);
10982 return(NULL);
10983 }
Owen Taylor3473f882001-02-23 17:55:21 +000010984
10985 /*
10986 * Ask the Entity resolver to load the damn thing
10987 */
10988
10989 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000010990 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
10991 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010992 if (input == NULL) {
10993 if (sax != NULL) ctxt->sax = NULL;
10994 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010995 if (systemIdCanonic != NULL)
10996 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010997 return(NULL);
10998 }
10999
11000 /*
11001 * plug some encoding conversion routines here.
11002 */
11003 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011004 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11005 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11006 xmlSwitchEncoding(ctxt, enc);
11007 }
Owen Taylor3473f882001-02-23 17:55:21 +000011008
11009 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011010 input->filename = (char *) systemIdCanonic;
11011 else
11012 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011013 input->line = 1;
11014 input->col = 1;
11015 input->base = ctxt->input->cur;
11016 input->cur = ctxt->input->cur;
11017 input->free = NULL;
11018
11019 /*
11020 * let's parse that entity knowing it's an external subset.
11021 */
11022 ctxt->inSubset = 2;
11023 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11024 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11025 ExternalID, SystemID);
11026 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11027
11028 if (ctxt->myDoc != NULL) {
11029 if (ctxt->wellFormed) {
11030 ret = ctxt->myDoc->extSubset;
11031 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011032 if (ret != NULL) {
11033 xmlNodePtr tmp;
11034
11035 ret->doc = NULL;
11036 tmp = ret->children;
11037 while (tmp != NULL) {
11038 tmp->doc = NULL;
11039 tmp = tmp->next;
11040 }
11041 }
Owen Taylor3473f882001-02-23 17:55:21 +000011042 } else {
11043 ret = NULL;
11044 }
11045 xmlFreeDoc(ctxt->myDoc);
11046 ctxt->myDoc = NULL;
11047 }
11048 if (sax != NULL) ctxt->sax = NULL;
11049 xmlFreeParserCtxt(ctxt);
11050
11051 return(ret);
11052}
11053
Daniel Veillard4432df22003-09-28 18:58:27 +000011054
Owen Taylor3473f882001-02-23 17:55:21 +000011055/**
11056 * xmlParseDTD:
11057 * @ExternalID: a NAME* containing the External ID of the DTD
11058 * @SystemID: a NAME* containing the URL to the DTD
11059 *
11060 * Load and parse an external subset.
11061 *
11062 * Returns the resulting xmlDtdPtr or NULL in case of error.
11063 */
11064
11065xmlDtdPtr
11066xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11067 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11068}
Daniel Veillard4432df22003-09-28 18:58:27 +000011069#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011070
11071/************************************************************************
11072 * *
11073 * Front ends when parsing an Entity *
11074 * *
11075 ************************************************************************/
11076
11077/**
Owen Taylor3473f882001-02-23 17:55:21 +000011078 * xmlParseCtxtExternalEntity:
11079 * @ctx: the existing parsing context
11080 * @URL: the URL for the entity to load
11081 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011082 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011083 *
11084 * Parse an external general entity within an existing parsing context
11085 * An external general parsed entity is well-formed if it matches the
11086 * production labeled extParsedEnt.
11087 *
11088 * [78] extParsedEnt ::= TextDecl? content
11089 *
11090 * Returns 0 if the entity is well formed, -1 in case of args problem and
11091 * the parser error code otherwise
11092 */
11093
11094int
11095xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011096 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011097 xmlParserCtxtPtr ctxt;
11098 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011099 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011100 xmlSAXHandlerPtr oldsax = NULL;
11101 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011102 xmlChar start[4];
11103 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011104 xmlParserInputPtr inputStream;
11105 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011106
Daniel Veillardce682bc2004-11-05 17:22:25 +000011107 if (ctx == NULL) return(-1);
11108
Owen Taylor3473f882001-02-23 17:55:21 +000011109 if (ctx->depth > 40) {
11110 return(XML_ERR_ENTITY_LOOP);
11111 }
11112
Daniel Veillardcda96922001-08-21 10:56:31 +000011113 if (lst != NULL)
11114 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011115 if ((URL == NULL) && (ID == NULL))
11116 return(-1);
11117 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11118 return(-1);
11119
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011120 ctxt = xmlNewParserCtxt();
11121 if (ctxt == NULL) {
11122 return(-1);
11123 }
11124
Owen Taylor3473f882001-02-23 17:55:21 +000011125 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011126 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011127
11128 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11129 if (inputStream == NULL) {
11130 xmlFreeParserCtxt(ctxt);
11131 return(-1);
11132 }
11133
11134 inputPush(ctxt, inputStream);
11135
11136 if ((ctxt->directory == NULL) && (directory == NULL))
11137 directory = xmlParserGetDirectory((char *)URL);
11138 if ((ctxt->directory == NULL) && (directory != NULL))
11139 ctxt->directory = directory;
11140
Owen Taylor3473f882001-02-23 17:55:21 +000011141 oldsax = ctxt->sax;
11142 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011143 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011144 newDoc = xmlNewDoc(BAD_CAST "1.0");
11145 if (newDoc == NULL) {
11146 xmlFreeParserCtxt(ctxt);
11147 return(-1);
11148 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011149 if (ctx->myDoc->dict) {
11150 newDoc->dict = ctx->myDoc->dict;
11151 xmlDictReference(newDoc->dict);
11152 }
Owen Taylor3473f882001-02-23 17:55:21 +000011153 if (ctx->myDoc != NULL) {
11154 newDoc->intSubset = ctx->myDoc->intSubset;
11155 newDoc->extSubset = ctx->myDoc->extSubset;
11156 }
11157 if (ctx->myDoc->URL != NULL) {
11158 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11159 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011160 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11161 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011162 ctxt->sax = oldsax;
11163 xmlFreeParserCtxt(ctxt);
11164 newDoc->intSubset = NULL;
11165 newDoc->extSubset = NULL;
11166 xmlFreeDoc(newDoc);
11167 return(-1);
11168 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011169 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011170 nodePush(ctxt, newDoc->children);
11171 if (ctx->myDoc == NULL) {
11172 ctxt->myDoc = newDoc;
11173 } else {
11174 ctxt->myDoc = ctx->myDoc;
11175 newDoc->children->doc = ctx->myDoc;
11176 }
11177
Daniel Veillard87a764e2001-06-20 17:41:10 +000011178 /*
11179 * Get the 4 first bytes and decode the charset
11180 * if enc != XML_CHAR_ENCODING_NONE
11181 * plug some encoding conversion routines.
11182 */
11183 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011184 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11185 start[0] = RAW;
11186 start[1] = NXT(1);
11187 start[2] = NXT(2);
11188 start[3] = NXT(3);
11189 enc = xmlDetectCharEncoding(start, 4);
11190 if (enc != XML_CHAR_ENCODING_NONE) {
11191 xmlSwitchEncoding(ctxt, enc);
11192 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011193 }
11194
Owen Taylor3473f882001-02-23 17:55:21 +000011195 /*
11196 * Parse a possible text declaration first
11197 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011199 xmlParseTextDecl(ctxt);
11200 }
11201
11202 /*
11203 * Doing validity checking on chunk doesn't make sense
11204 */
11205 ctxt->instate = XML_PARSER_CONTENT;
11206 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011207 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011208 ctxt->loadsubset = ctx->loadsubset;
11209 ctxt->depth = ctx->depth + 1;
11210 ctxt->replaceEntities = ctx->replaceEntities;
11211 if (ctxt->validate) {
11212 ctxt->vctxt.error = ctx->vctxt.error;
11213 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011214 } else {
11215 ctxt->vctxt.error = NULL;
11216 ctxt->vctxt.warning = NULL;
11217 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011218 ctxt->vctxt.nodeTab = NULL;
11219 ctxt->vctxt.nodeNr = 0;
11220 ctxt->vctxt.nodeMax = 0;
11221 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011222 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11223 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011224 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11225 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11226 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011227 ctxt->dictNames = ctx->dictNames;
11228 ctxt->attsDefault = ctx->attsDefault;
11229 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011230 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011231
11232 xmlParseContent(ctxt);
11233
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011234 ctx->validate = ctxt->validate;
11235 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011236 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011237 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011238 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011239 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011240 }
11241 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011242 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011243 }
11244
11245 if (!ctxt->wellFormed) {
11246 if (ctxt->errNo == 0)
11247 ret = 1;
11248 else
11249 ret = ctxt->errNo;
11250 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011251 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011252 xmlNodePtr cur;
11253
11254 /*
11255 * Return the newly created nodeset after unlinking it from
11256 * they pseudo parent.
11257 */
11258 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011259 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011260 while (cur != NULL) {
11261 cur->parent = NULL;
11262 cur = cur->next;
11263 }
11264 newDoc->children->children = NULL;
11265 }
11266 ret = 0;
11267 }
11268 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011269 ctxt->dict = NULL;
11270 ctxt->attsDefault = NULL;
11271 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011272 xmlFreeParserCtxt(ctxt);
11273 newDoc->intSubset = NULL;
11274 newDoc->extSubset = NULL;
11275 xmlFreeDoc(newDoc);
11276
11277 return(ret);
11278}
11279
11280/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011281 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011282 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011283 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011284 * @sax: the SAX handler bloc (possibly NULL)
11285 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11286 * @depth: Used for loop detection, use 0
11287 * @URL: the URL for the entity to load
11288 * @ID: the System ID for the entity to load
11289 * @list: the return value for the set of parsed nodes
11290 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011291 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011292 *
11293 * Returns 0 if the entity is well formed, -1 in case of args problem and
11294 * the parser error code otherwise
11295 */
11296
Daniel Veillard7d515752003-09-26 19:12:37 +000011297static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011298xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11299 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011300 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011301 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011302 xmlParserCtxtPtr ctxt;
11303 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011304 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011305 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011306 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011307 xmlChar start[4];
11308 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011309
11310 if (depth > 40) {
11311 return(XML_ERR_ENTITY_LOOP);
11312 }
11313
11314
11315
11316 if (list != NULL)
11317 *list = NULL;
11318 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011319 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011320 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011321 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011322
11323
11324 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011325 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011326 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011327 if (oldctxt != NULL) {
11328 ctxt->_private = oldctxt->_private;
11329 ctxt->loadsubset = oldctxt->loadsubset;
11330 ctxt->validate = oldctxt->validate;
11331 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011332 ctxt->record_info = oldctxt->record_info;
11333 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11334 ctxt->node_seq.length = oldctxt->node_seq.length;
11335 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011336 } else {
11337 /*
11338 * Doing validity checking on chunk without context
11339 * doesn't make sense
11340 */
11341 ctxt->_private = NULL;
11342 ctxt->validate = 0;
11343 ctxt->external = 2;
11344 ctxt->loadsubset = 0;
11345 }
Owen Taylor3473f882001-02-23 17:55:21 +000011346 if (sax != NULL) {
11347 oldsax = ctxt->sax;
11348 ctxt->sax = sax;
11349 if (user_data != NULL)
11350 ctxt->userData = user_data;
11351 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011352 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011353 newDoc = xmlNewDoc(BAD_CAST "1.0");
11354 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011355 ctxt->node_seq.maximum = 0;
11356 ctxt->node_seq.length = 0;
11357 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011358 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011359 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011360 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011361 newDoc->intSubset = doc->intSubset;
11362 newDoc->extSubset = doc->extSubset;
11363 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011364 xmlDictReference(newDoc->dict);
11365
Owen Taylor3473f882001-02-23 17:55:21 +000011366 if (doc->URL != NULL) {
11367 newDoc->URL = xmlStrdup(doc->URL);
11368 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011369 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11370 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011371 if (sax != NULL)
11372 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011373 ctxt->node_seq.maximum = 0;
11374 ctxt->node_seq.length = 0;
11375 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011376 xmlFreeParserCtxt(ctxt);
11377 newDoc->intSubset = NULL;
11378 newDoc->extSubset = NULL;
11379 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011380 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011381 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011382 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011383 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011384 ctxt->myDoc = doc;
11385 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011386
Daniel Veillard87a764e2001-06-20 17:41:10 +000011387 /*
11388 * Get the 4 first bytes and decode the charset
11389 * if enc != XML_CHAR_ENCODING_NONE
11390 * plug some encoding conversion routines.
11391 */
11392 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011393 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11394 start[0] = RAW;
11395 start[1] = NXT(1);
11396 start[2] = NXT(2);
11397 start[3] = NXT(3);
11398 enc = xmlDetectCharEncoding(start, 4);
11399 if (enc != XML_CHAR_ENCODING_NONE) {
11400 xmlSwitchEncoding(ctxt, enc);
11401 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011402 }
11403
Owen Taylor3473f882001-02-23 17:55:21 +000011404 /*
11405 * Parse a possible text declaration first
11406 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011407 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011408 xmlParseTextDecl(ctxt);
11409 }
11410
Owen Taylor3473f882001-02-23 17:55:21 +000011411 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011412 ctxt->depth = depth;
11413
11414 xmlParseContent(ctxt);
11415
Daniel Veillard561b7f82002-03-20 21:55:57 +000011416 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011417 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011418 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011419 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011420 }
11421 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011422 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011423 }
11424
11425 if (!ctxt->wellFormed) {
11426 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011427 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011428 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011429 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011430 } else {
11431 if (list != NULL) {
11432 xmlNodePtr cur;
11433
11434 /*
11435 * Return the newly created nodeset after unlinking it from
11436 * they pseudo parent.
11437 */
11438 cur = newDoc->children->children;
11439 *list = cur;
11440 while (cur != NULL) {
11441 cur->parent = NULL;
11442 cur = cur->next;
11443 }
11444 newDoc->children->children = NULL;
11445 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011446 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011447 }
11448 if (sax != NULL)
11449 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011450 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11451 oldctxt->node_seq.length = ctxt->node_seq.length;
11452 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011453 ctxt->node_seq.maximum = 0;
11454 ctxt->node_seq.length = 0;
11455 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011456 xmlFreeParserCtxt(ctxt);
11457 newDoc->intSubset = NULL;
11458 newDoc->extSubset = NULL;
11459 xmlFreeDoc(newDoc);
11460
11461 return(ret);
11462}
11463
Daniel Veillard81273902003-09-30 00:43:48 +000011464#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011465/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011466 * xmlParseExternalEntity:
11467 * @doc: the document the chunk pertains to
11468 * @sax: the SAX handler bloc (possibly NULL)
11469 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11470 * @depth: Used for loop detection, use 0
11471 * @URL: the URL for the entity to load
11472 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011473 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011474 *
11475 * Parse an external general entity
11476 * An external general parsed entity is well-formed if it matches the
11477 * production labeled extParsedEnt.
11478 *
11479 * [78] extParsedEnt ::= TextDecl? content
11480 *
11481 * Returns 0 if the entity is well formed, -1 in case of args problem and
11482 * the parser error code otherwise
11483 */
11484
11485int
11486xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011487 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011488 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011489 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011490}
11491
11492/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011493 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011494 * @doc: the document the chunk pertains to
11495 * @sax: the SAX handler bloc (possibly NULL)
11496 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11497 * @depth: Used for loop detection, use 0
11498 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011499 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011500 *
11501 * Parse a well-balanced chunk of an XML document
11502 * called by the parser
11503 * The allowed sequence for the Well Balanced Chunk is the one defined by
11504 * the content production in the XML grammar:
11505 *
11506 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11507 *
11508 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11509 * the parser error code otherwise
11510 */
11511
11512int
11513xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011514 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011515 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11516 depth, string, lst, 0 );
11517}
Daniel Veillard81273902003-09-30 00:43:48 +000011518#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011519
11520/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011521 * xmlParseBalancedChunkMemoryInternal:
11522 * @oldctxt: the existing parsing context
11523 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11524 * @user_data: the user data field for the parser context
11525 * @lst: the return value for the set of parsed nodes
11526 *
11527 *
11528 * Parse a well-balanced chunk of an XML document
11529 * called by the parser
11530 * The allowed sequence for the Well Balanced Chunk is the one defined by
11531 * the content production in the XML grammar:
11532 *
11533 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11534 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011535 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11536 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011537 *
11538 * In case recover is set to 1, the nodelist will not be empty even if
11539 * the parsed chunk is not well balanced.
11540 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011541static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011542xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11543 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11544 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011545 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011546 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011547 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011548 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011549 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011550 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011551 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011552
11553 if (oldctxt->depth > 40) {
11554 return(XML_ERR_ENTITY_LOOP);
11555 }
11556
11557
11558 if (lst != NULL)
11559 *lst = NULL;
11560 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011561 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011562
11563 size = xmlStrlen(string);
11564
11565 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011566 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011567 if (user_data != NULL)
11568 ctxt->userData = user_data;
11569 else
11570 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011571 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11572 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011573 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11574 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11575 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011576
11577 oldsax = ctxt->sax;
11578 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011579 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011580 ctxt->replaceEntities = oldctxt->replaceEntities;
11581 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011582
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011583 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011584 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011585 newDoc = xmlNewDoc(BAD_CAST "1.0");
11586 if (newDoc == NULL) {
11587 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011588 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011589 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011590 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011591 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011592 newDoc->dict = ctxt->dict;
11593 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011594 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011595 } else {
11596 ctxt->myDoc = oldctxt->myDoc;
11597 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011598 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011599 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011600 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11601 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011602 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011603 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011604 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011605 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011606 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011607 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011608 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011609 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011610 ctxt->myDoc->children = NULL;
11611 ctxt->myDoc->last = NULL;
11612 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011613 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011614 ctxt->instate = XML_PARSER_CONTENT;
11615 ctxt->depth = oldctxt->depth + 1;
11616
Daniel Veillard328f48c2002-11-15 15:24:34 +000011617 ctxt->validate = 0;
11618 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011619 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11620 /*
11621 * ID/IDREF registration will be done in xmlValidateElement below
11622 */
11623 ctxt->loadsubset |= XML_SKIP_IDS;
11624 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011625 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011626 ctxt->attsDefault = oldctxt->attsDefault;
11627 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011628
Daniel Veillard68e9e742002-11-16 15:35:11 +000011629 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011630 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011631 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011632 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011633 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011634 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011635 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011636 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011637 }
11638
11639 if (!ctxt->wellFormed) {
11640 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011641 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011642 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011643 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011644 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011645 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011646 }
11647
William M. Brack7b9154b2003-09-27 19:23:50 +000011648 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011649 xmlNodePtr cur;
11650
11651 /*
11652 * Return the newly created nodeset after unlinking it from
11653 * they pseudo parent.
11654 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011655 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011656 *lst = cur;
11657 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011658#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011659 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11660 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11661 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011662 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11663 oldctxt->myDoc, cur);
11664 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011665#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011666 cur->parent = NULL;
11667 cur = cur->next;
11668 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011669 ctxt->myDoc->children->children = NULL;
11670 }
11671 if (ctxt->myDoc != NULL) {
11672 xmlFreeNode(ctxt->myDoc->children);
11673 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011674 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011675 }
11676
11677 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011678 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011679 ctxt->attsDefault = NULL;
11680 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011681 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011682 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011683 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011684 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011685
11686 return(ret);
11687}
11688
Daniel Veillard29b17482004-08-16 00:39:03 +000011689/**
11690 * xmlParseInNodeContext:
11691 * @node: the context node
11692 * @data: the input string
11693 * @datalen: the input string length in bytes
11694 * @options: a combination of xmlParserOption
11695 * @lst: the return value for the set of parsed nodes
11696 *
11697 * Parse a well-balanced chunk of an XML document
11698 * within the context (DTD, namespaces, etc ...) of the given node.
11699 *
11700 * The allowed sequence for the data is a Well Balanced Chunk defined by
11701 * the content production in the XML grammar:
11702 *
11703 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11704 *
11705 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11706 * error code otherwise
11707 */
11708xmlParserErrors
11709xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11710 int options, xmlNodePtr *lst) {
11711#ifdef SAX2
11712 xmlParserCtxtPtr ctxt;
11713 xmlDocPtr doc = NULL;
11714 xmlNodePtr fake, cur;
11715 int nsnr = 0;
11716
11717 xmlParserErrors ret = XML_ERR_OK;
11718
11719 /*
11720 * check all input parameters, grab the document
11721 */
11722 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11723 return(XML_ERR_INTERNAL_ERROR);
11724 switch (node->type) {
11725 case XML_ELEMENT_NODE:
11726 case XML_ATTRIBUTE_NODE:
11727 case XML_TEXT_NODE:
11728 case XML_CDATA_SECTION_NODE:
11729 case XML_ENTITY_REF_NODE:
11730 case XML_PI_NODE:
11731 case XML_COMMENT_NODE:
11732 case XML_DOCUMENT_NODE:
11733 case XML_HTML_DOCUMENT_NODE:
11734 break;
11735 default:
11736 return(XML_ERR_INTERNAL_ERROR);
11737
11738 }
11739 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11740 (node->type != XML_DOCUMENT_NODE) &&
11741 (node->type != XML_HTML_DOCUMENT_NODE))
11742 node = node->parent;
11743 if (node == NULL)
11744 return(XML_ERR_INTERNAL_ERROR);
11745 if (node->type == XML_ELEMENT_NODE)
11746 doc = node->doc;
11747 else
11748 doc = (xmlDocPtr) node;
11749 if (doc == NULL)
11750 return(XML_ERR_INTERNAL_ERROR);
11751
11752 /*
11753 * allocate a context and set-up everything not related to the
11754 * node position in the tree
11755 */
11756 if (doc->type == XML_DOCUMENT_NODE)
11757 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11758#ifdef LIBXML_HTML_ENABLED
11759 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11760 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11761#endif
11762 else
11763 return(XML_ERR_INTERNAL_ERROR);
11764
11765 if (ctxt == NULL)
11766 return(XML_ERR_NO_MEMORY);
11767 fake = xmlNewComment(NULL);
11768 if (fake == NULL) {
11769 xmlFreeParserCtxt(ctxt);
11770 return(XML_ERR_NO_MEMORY);
11771 }
11772 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011773
11774 /*
11775 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11776 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11777 * we must wait until the last moment to free the original one.
11778 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011779 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011780 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011781 xmlDictFree(ctxt->dict);
11782 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011783 } else
11784 options |= XML_PARSE_NODICT;
11785
11786 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011787 xmlDetectSAX2(ctxt);
11788 ctxt->myDoc = doc;
11789
11790 if (node->type == XML_ELEMENT_NODE) {
11791 nodePush(ctxt, node);
11792 /*
11793 * initialize the SAX2 namespaces stack
11794 */
11795 cur = node;
11796 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11797 xmlNsPtr ns = cur->nsDef;
11798 const xmlChar *iprefix, *ihref;
11799
11800 while (ns != NULL) {
11801 if (ctxt->dict) {
11802 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11803 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11804 } else {
11805 iprefix = ns->prefix;
11806 ihref = ns->href;
11807 }
11808
11809 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11810 nsPush(ctxt, iprefix, ihref);
11811 nsnr++;
11812 }
11813 ns = ns->next;
11814 }
11815 cur = cur->parent;
11816 }
11817 ctxt->instate = XML_PARSER_CONTENT;
11818 }
11819
11820 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11821 /*
11822 * ID/IDREF registration will be done in xmlValidateElement below
11823 */
11824 ctxt->loadsubset |= XML_SKIP_IDS;
11825 }
11826
Daniel Veillard499cc922006-01-18 17:22:35 +000011827#ifdef LIBXML_HTML_ENABLED
11828 if (doc->type == XML_HTML_DOCUMENT_NODE)
11829 __htmlParseContent(ctxt);
11830 else
11831#endif
11832 xmlParseContent(ctxt);
11833
Daniel Veillard29b17482004-08-16 00:39:03 +000011834 nsPop(ctxt, nsnr);
11835 if ((RAW == '<') && (NXT(1) == '/')) {
11836 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11837 } else if (RAW != 0) {
11838 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11839 }
11840 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11841 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11842 ctxt->wellFormed = 0;
11843 }
11844
11845 if (!ctxt->wellFormed) {
11846 if (ctxt->errNo == 0)
11847 ret = XML_ERR_INTERNAL_ERROR;
11848 else
11849 ret = (xmlParserErrors)ctxt->errNo;
11850 } else {
11851 ret = XML_ERR_OK;
11852 }
11853
11854 /*
11855 * Return the newly created nodeset after unlinking it from
11856 * the pseudo sibling.
11857 */
11858
11859 cur = fake->next;
11860 fake->next = NULL;
11861 node->last = fake;
11862
11863 if (cur != NULL) {
11864 cur->prev = NULL;
11865 }
11866
11867 *lst = cur;
11868
11869 while (cur != NULL) {
11870 cur->parent = NULL;
11871 cur = cur->next;
11872 }
11873
11874 xmlUnlinkNode(fake);
11875 xmlFreeNode(fake);
11876
11877
11878 if (ret != XML_ERR_OK) {
11879 xmlFreeNodeList(*lst);
11880 *lst = NULL;
11881 }
William M. Brackc3f81342004-10-03 01:22:44 +000011882
William M. Brackb7b54de2004-10-06 16:38:01 +000011883 if (doc->dict != NULL)
11884 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011885 xmlFreeParserCtxt(ctxt);
11886
11887 return(ret);
11888#else /* !SAX2 */
11889 return(XML_ERR_INTERNAL_ERROR);
11890#endif
11891}
11892
Daniel Veillard81273902003-09-30 00:43:48 +000011893#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011894/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011895 * xmlParseBalancedChunkMemoryRecover:
11896 * @doc: the document the chunk pertains to
11897 * @sax: the SAX handler bloc (possibly NULL)
11898 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11899 * @depth: Used for loop detection, use 0
11900 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11901 * @lst: the return value for the set of parsed nodes
11902 * @recover: return nodes even if the data is broken (use 0)
11903 *
11904 *
11905 * Parse a well-balanced chunk of an XML document
11906 * called by the parser
11907 * The allowed sequence for the Well Balanced Chunk is the one defined by
11908 * the content production in the XML grammar:
11909 *
11910 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11911 *
11912 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11913 * the parser error code otherwise
11914 *
11915 * In case recover is set to 1, the nodelist will not be empty even if
11916 * the parsed chunk is not well balanced.
11917 */
11918int
11919xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11920 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11921 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011922 xmlParserCtxtPtr ctxt;
11923 xmlDocPtr newDoc;
11924 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011925 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011926 int size;
11927 int ret = 0;
11928
11929 if (depth > 40) {
11930 return(XML_ERR_ENTITY_LOOP);
11931 }
11932
11933
Daniel Veillardcda96922001-08-21 10:56:31 +000011934 if (lst != NULL)
11935 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011936 if (string == NULL)
11937 return(-1);
11938
11939 size = xmlStrlen(string);
11940
11941 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11942 if (ctxt == NULL) return(-1);
11943 ctxt->userData = ctxt;
11944 if (sax != NULL) {
11945 oldsax = ctxt->sax;
11946 ctxt->sax = sax;
11947 if (user_data != NULL)
11948 ctxt->userData = user_data;
11949 }
11950 newDoc = xmlNewDoc(BAD_CAST "1.0");
11951 if (newDoc == NULL) {
11952 xmlFreeParserCtxt(ctxt);
11953 return(-1);
11954 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011955 if ((doc != NULL) && (doc->dict != NULL)) {
11956 xmlDictFree(ctxt->dict);
11957 ctxt->dict = doc->dict;
11958 xmlDictReference(ctxt->dict);
11959 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11960 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11961 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11962 ctxt->dictNames = 1;
11963 } else {
11964 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11965 }
Owen Taylor3473f882001-02-23 17:55:21 +000011966 if (doc != NULL) {
11967 newDoc->intSubset = doc->intSubset;
11968 newDoc->extSubset = doc->extSubset;
11969 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011970 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11971 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011972 if (sax != NULL)
11973 ctxt->sax = oldsax;
11974 xmlFreeParserCtxt(ctxt);
11975 newDoc->intSubset = NULL;
11976 newDoc->extSubset = NULL;
11977 xmlFreeDoc(newDoc);
11978 return(-1);
11979 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011980 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11981 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011982 if (doc == NULL) {
11983 ctxt->myDoc = newDoc;
11984 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011985 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011986 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000011987 /* Ensure that doc has XML spec namespace */
11988 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
11989 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000011990 }
11991 ctxt->instate = XML_PARSER_CONTENT;
11992 ctxt->depth = depth;
11993
11994 /*
11995 * Doing validity checking on chunk doesn't make sense
11996 */
11997 ctxt->validate = 0;
11998 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011999 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012000
Daniel Veillardb39bc392002-10-26 19:29:51 +000012001 if ( doc != NULL ){
12002 content = doc->children;
12003 doc->children = NULL;
12004 xmlParseContent(ctxt);
12005 doc->children = content;
12006 }
12007 else {
12008 xmlParseContent(ctxt);
12009 }
Owen Taylor3473f882001-02-23 17:55:21 +000012010 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012012 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012013 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012014 }
12015 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012016 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012017 }
12018
12019 if (!ctxt->wellFormed) {
12020 if (ctxt->errNo == 0)
12021 ret = 1;
12022 else
12023 ret = ctxt->errNo;
12024 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012025 ret = 0;
12026 }
12027
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012028 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12029 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012030
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012031 /*
12032 * Return the newly created nodeset after unlinking it from
12033 * they pseudo parent.
12034 */
12035 cur = newDoc->children->children;
12036 *lst = cur;
12037 while (cur != NULL) {
12038 xmlSetTreeDoc(cur, doc);
12039 cur->parent = NULL;
12040 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012041 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012042 newDoc->children->children = NULL;
12043 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012044
Owen Taylor3473f882001-02-23 17:55:21 +000012045 if (sax != NULL)
12046 ctxt->sax = oldsax;
12047 xmlFreeParserCtxt(ctxt);
12048 newDoc->intSubset = NULL;
12049 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012050 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012051 xmlFreeDoc(newDoc);
12052
12053 return(ret);
12054}
12055
12056/**
12057 * xmlSAXParseEntity:
12058 * @sax: the SAX handler block
12059 * @filename: the filename
12060 *
12061 * parse an XML external entity out of context and build a tree.
12062 * It use the given SAX function block to handle the parsing callback.
12063 * If sax is NULL, fallback to the default DOM tree building routines.
12064 *
12065 * [78] extParsedEnt ::= TextDecl? content
12066 *
12067 * This correspond to a "Well Balanced" chunk
12068 *
12069 * Returns the resulting document tree
12070 */
12071
12072xmlDocPtr
12073xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12074 xmlDocPtr ret;
12075 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012076
12077 ctxt = xmlCreateFileParserCtxt(filename);
12078 if (ctxt == NULL) {
12079 return(NULL);
12080 }
12081 if (sax != NULL) {
12082 if (ctxt->sax != NULL)
12083 xmlFree(ctxt->sax);
12084 ctxt->sax = sax;
12085 ctxt->userData = NULL;
12086 }
12087
Owen Taylor3473f882001-02-23 17:55:21 +000012088 xmlParseExtParsedEnt(ctxt);
12089
12090 if (ctxt->wellFormed)
12091 ret = ctxt->myDoc;
12092 else {
12093 ret = NULL;
12094 xmlFreeDoc(ctxt->myDoc);
12095 ctxt->myDoc = NULL;
12096 }
12097 if (sax != NULL)
12098 ctxt->sax = NULL;
12099 xmlFreeParserCtxt(ctxt);
12100
12101 return(ret);
12102}
12103
12104/**
12105 * xmlParseEntity:
12106 * @filename: the filename
12107 *
12108 * parse an XML external entity out of context and build a tree.
12109 *
12110 * [78] extParsedEnt ::= TextDecl? content
12111 *
12112 * This correspond to a "Well Balanced" chunk
12113 *
12114 * Returns the resulting document tree
12115 */
12116
12117xmlDocPtr
12118xmlParseEntity(const char *filename) {
12119 return(xmlSAXParseEntity(NULL, filename));
12120}
Daniel Veillard81273902003-09-30 00:43:48 +000012121#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012122
12123/**
12124 * xmlCreateEntityParserCtxt:
12125 * @URL: the entity URL
12126 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012127 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012128 *
12129 * Create a parser context for an external entity
12130 * Automatic support for ZLIB/Compress compressed document is provided
12131 * by default if found at compile-time.
12132 *
12133 * Returns the new parser context or NULL
12134 */
12135xmlParserCtxtPtr
12136xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12137 const xmlChar *base) {
12138 xmlParserCtxtPtr ctxt;
12139 xmlParserInputPtr inputStream;
12140 char *directory = NULL;
12141 xmlChar *uri;
12142
12143 ctxt = xmlNewParserCtxt();
12144 if (ctxt == NULL) {
12145 return(NULL);
12146 }
12147
12148 uri = xmlBuildURI(URL, base);
12149
12150 if (uri == NULL) {
12151 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12152 if (inputStream == NULL) {
12153 xmlFreeParserCtxt(ctxt);
12154 return(NULL);
12155 }
12156
12157 inputPush(ctxt, inputStream);
12158
12159 if ((ctxt->directory == NULL) && (directory == NULL))
12160 directory = xmlParserGetDirectory((char *)URL);
12161 if ((ctxt->directory == NULL) && (directory != NULL))
12162 ctxt->directory = directory;
12163 } else {
12164 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12165 if (inputStream == NULL) {
12166 xmlFree(uri);
12167 xmlFreeParserCtxt(ctxt);
12168 return(NULL);
12169 }
12170
12171 inputPush(ctxt, inputStream);
12172
12173 if ((ctxt->directory == NULL) && (directory == NULL))
12174 directory = xmlParserGetDirectory((char *)uri);
12175 if ((ctxt->directory == NULL) && (directory != NULL))
12176 ctxt->directory = directory;
12177 xmlFree(uri);
12178 }
Owen Taylor3473f882001-02-23 17:55:21 +000012179 return(ctxt);
12180}
12181
12182/************************************************************************
12183 * *
12184 * Front ends when parsing from a file *
12185 * *
12186 ************************************************************************/
12187
12188/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012189 * xmlCreateURLParserCtxt:
12190 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012191 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012192 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012193 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012194 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012195 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012196 *
12197 * Returns the new parser context or NULL
12198 */
12199xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012200xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012201{
12202 xmlParserCtxtPtr ctxt;
12203 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012204 char *directory = NULL;
12205
Owen Taylor3473f882001-02-23 17:55:21 +000012206 ctxt = xmlNewParserCtxt();
12207 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012208 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012209 return(NULL);
12210 }
12211
Daniel Veillarddf292f72005-01-16 19:00:15 +000012212 if (options)
12213 xmlCtxtUseOptions(ctxt, options);
12214 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012215
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012216 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012217 if (inputStream == NULL) {
12218 xmlFreeParserCtxt(ctxt);
12219 return(NULL);
12220 }
12221
Owen Taylor3473f882001-02-23 17:55:21 +000012222 inputPush(ctxt, inputStream);
12223 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012224 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012225 if ((ctxt->directory == NULL) && (directory != NULL))
12226 ctxt->directory = directory;
12227
12228 return(ctxt);
12229}
12230
Daniel Veillard61b93382003-11-03 14:28:31 +000012231/**
12232 * xmlCreateFileParserCtxt:
12233 * @filename: the filename
12234 *
12235 * Create a parser context for a file content.
12236 * Automatic support for ZLIB/Compress compressed document is provided
12237 * by default if found at compile-time.
12238 *
12239 * Returns the new parser context or NULL
12240 */
12241xmlParserCtxtPtr
12242xmlCreateFileParserCtxt(const char *filename)
12243{
12244 return(xmlCreateURLParserCtxt(filename, 0));
12245}
12246
Daniel Veillard81273902003-09-30 00:43:48 +000012247#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012248/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012249 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012250 * @sax: the SAX handler block
12251 * @filename: the filename
12252 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12253 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012254 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012255 *
12256 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12257 * compressed document is provided by default if found at compile-time.
12258 * It use the given SAX function block to handle the parsing callback.
12259 * If sax is NULL, fallback to the default DOM tree building routines.
12260 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012261 * User data (void *) is stored within the parser context in the
12262 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012263 *
Owen Taylor3473f882001-02-23 17:55:21 +000012264 * Returns the resulting document tree
12265 */
12266
12267xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012268xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12269 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012270 xmlDocPtr ret;
12271 xmlParserCtxtPtr ctxt;
12272 char *directory = NULL;
12273
Daniel Veillard635ef722001-10-29 11:48:19 +000012274 xmlInitParser();
12275
Owen Taylor3473f882001-02-23 17:55:21 +000012276 ctxt = xmlCreateFileParserCtxt(filename);
12277 if (ctxt == NULL) {
12278 return(NULL);
12279 }
12280 if (sax != NULL) {
12281 if (ctxt->sax != NULL)
12282 xmlFree(ctxt->sax);
12283 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012284 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012285 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012286 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012287 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012288 }
Owen Taylor3473f882001-02-23 17:55:21 +000012289
12290 if ((ctxt->directory == NULL) && (directory == NULL))
12291 directory = xmlParserGetDirectory(filename);
12292 if ((ctxt->directory == NULL) && (directory != NULL))
12293 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12294
Daniel Veillarddad3f682002-11-17 16:47:27 +000012295 ctxt->recovery = recovery;
12296
Owen Taylor3473f882001-02-23 17:55:21 +000012297 xmlParseDocument(ctxt);
12298
William M. Brackc07329e2003-09-08 01:57:30 +000012299 if ((ctxt->wellFormed) || recovery) {
12300 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012301 if (ret != NULL) {
12302 if (ctxt->input->buf->compressed > 0)
12303 ret->compression = 9;
12304 else
12305 ret->compression = ctxt->input->buf->compressed;
12306 }
William M. Brackc07329e2003-09-08 01:57:30 +000012307 }
Owen Taylor3473f882001-02-23 17:55:21 +000012308 else {
12309 ret = NULL;
12310 xmlFreeDoc(ctxt->myDoc);
12311 ctxt->myDoc = NULL;
12312 }
12313 if (sax != NULL)
12314 ctxt->sax = NULL;
12315 xmlFreeParserCtxt(ctxt);
12316
12317 return(ret);
12318}
12319
12320/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012321 * xmlSAXParseFile:
12322 * @sax: the SAX handler block
12323 * @filename: the filename
12324 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12325 * documents
12326 *
12327 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12328 * compressed document is provided by default if found at compile-time.
12329 * It use the given SAX function block to handle the parsing callback.
12330 * If sax is NULL, fallback to the default DOM tree building routines.
12331 *
12332 * Returns the resulting document tree
12333 */
12334
12335xmlDocPtr
12336xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12337 int recovery) {
12338 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12339}
12340
12341/**
Owen Taylor3473f882001-02-23 17:55:21 +000012342 * xmlRecoverDoc:
12343 * @cur: a pointer to an array of xmlChar
12344 *
12345 * parse an XML in-memory document and build a tree.
12346 * In the case the document is not Well Formed, a tree is built anyway
12347 *
12348 * Returns the resulting document tree
12349 */
12350
12351xmlDocPtr
12352xmlRecoverDoc(xmlChar *cur) {
12353 return(xmlSAXParseDoc(NULL, cur, 1));
12354}
12355
12356/**
12357 * xmlParseFile:
12358 * @filename: the filename
12359 *
12360 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12361 * compressed document is provided by default if found at compile-time.
12362 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012363 * Returns the resulting document tree if the file was wellformed,
12364 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012365 */
12366
12367xmlDocPtr
12368xmlParseFile(const char *filename) {
12369 return(xmlSAXParseFile(NULL, filename, 0));
12370}
12371
12372/**
12373 * xmlRecoverFile:
12374 * @filename: the filename
12375 *
12376 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12377 * compressed document is provided by default if found at compile-time.
12378 * In the case the document is not Well Formed, a tree is built anyway
12379 *
12380 * Returns the resulting document tree
12381 */
12382
12383xmlDocPtr
12384xmlRecoverFile(const char *filename) {
12385 return(xmlSAXParseFile(NULL, filename, 1));
12386}
12387
12388
12389/**
12390 * xmlSetupParserForBuffer:
12391 * @ctxt: an XML parser context
12392 * @buffer: a xmlChar * buffer
12393 * @filename: a file name
12394 *
12395 * Setup the parser context to parse a new buffer; Clears any prior
12396 * contents from the parser context. The buffer parameter must not be
12397 * NULL, but the filename parameter can be
12398 */
12399void
12400xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12401 const char* filename)
12402{
12403 xmlParserInputPtr input;
12404
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012405 if ((ctxt == NULL) || (buffer == NULL))
12406 return;
12407
Owen Taylor3473f882001-02-23 17:55:21 +000012408 input = xmlNewInputStream(ctxt);
12409 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012410 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012411 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012412 return;
12413 }
12414
12415 xmlClearParserCtxt(ctxt);
12416 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012417 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012418 input->base = buffer;
12419 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012420 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012421 inputPush(ctxt, input);
12422}
12423
12424/**
12425 * xmlSAXUserParseFile:
12426 * @sax: a SAX handler
12427 * @user_data: The user data returned on SAX callbacks
12428 * @filename: a file name
12429 *
12430 * parse an XML file and call the given SAX handler routines.
12431 * Automatic support for ZLIB/Compress compressed document is provided
12432 *
12433 * Returns 0 in case of success or a error number otherwise
12434 */
12435int
12436xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12437 const char *filename) {
12438 int ret = 0;
12439 xmlParserCtxtPtr ctxt;
12440
12441 ctxt = xmlCreateFileParserCtxt(filename);
12442 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012443#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012444 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012445#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012446 xmlFree(ctxt->sax);
12447 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012448 xmlDetectSAX2(ctxt);
12449
Owen Taylor3473f882001-02-23 17:55:21 +000012450 if (user_data != NULL)
12451 ctxt->userData = user_data;
12452
12453 xmlParseDocument(ctxt);
12454
12455 if (ctxt->wellFormed)
12456 ret = 0;
12457 else {
12458 if (ctxt->errNo != 0)
12459 ret = ctxt->errNo;
12460 else
12461 ret = -1;
12462 }
12463 if (sax != NULL)
12464 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012465 if (ctxt->myDoc != NULL) {
12466 xmlFreeDoc(ctxt->myDoc);
12467 ctxt->myDoc = NULL;
12468 }
Owen Taylor3473f882001-02-23 17:55:21 +000012469 xmlFreeParserCtxt(ctxt);
12470
12471 return ret;
12472}
Daniel Veillard81273902003-09-30 00:43:48 +000012473#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012474
12475/************************************************************************
12476 * *
12477 * Front ends when parsing from memory *
12478 * *
12479 ************************************************************************/
12480
12481/**
12482 * xmlCreateMemoryParserCtxt:
12483 * @buffer: a pointer to a char array
12484 * @size: the size of the array
12485 *
12486 * Create a parser context for an XML in-memory document.
12487 *
12488 * Returns the new parser context or NULL
12489 */
12490xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012491xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012492 xmlParserCtxtPtr ctxt;
12493 xmlParserInputPtr input;
12494 xmlParserInputBufferPtr buf;
12495
12496 if (buffer == NULL)
12497 return(NULL);
12498 if (size <= 0)
12499 return(NULL);
12500
12501 ctxt = xmlNewParserCtxt();
12502 if (ctxt == NULL)
12503 return(NULL);
12504
Daniel Veillard53350552003-09-18 13:35:51 +000012505 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012506 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012507 if (buf == NULL) {
12508 xmlFreeParserCtxt(ctxt);
12509 return(NULL);
12510 }
Owen Taylor3473f882001-02-23 17:55:21 +000012511
12512 input = xmlNewInputStream(ctxt);
12513 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012514 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012515 xmlFreeParserCtxt(ctxt);
12516 return(NULL);
12517 }
12518
12519 input->filename = NULL;
12520 input->buf = buf;
12521 input->base = input->buf->buffer->content;
12522 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012523 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012524
12525 inputPush(ctxt, input);
12526 return(ctxt);
12527}
12528
Daniel Veillard81273902003-09-30 00:43:48 +000012529#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012530/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012531 * xmlSAXParseMemoryWithData:
12532 * @sax: the SAX handler block
12533 * @buffer: an pointer to a char array
12534 * @size: the size of the array
12535 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12536 * documents
12537 * @data: the userdata
12538 *
12539 * parse an XML in-memory block and use the given SAX function block
12540 * to handle the parsing callback. If sax is NULL, fallback to the default
12541 * DOM tree building routines.
12542 *
12543 * User data (void *) is stored within the parser context in the
12544 * context's _private member, so it is available nearly everywhere in libxml
12545 *
12546 * Returns the resulting document tree
12547 */
12548
12549xmlDocPtr
12550xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12551 int size, int recovery, void *data) {
12552 xmlDocPtr ret;
12553 xmlParserCtxtPtr ctxt;
12554
12555 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12556 if (ctxt == NULL) return(NULL);
12557 if (sax != NULL) {
12558 if (ctxt->sax != NULL)
12559 xmlFree(ctxt->sax);
12560 ctxt->sax = sax;
12561 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012562 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012563 if (data!=NULL) {
12564 ctxt->_private=data;
12565 }
12566
Daniel Veillardadba5f12003-04-04 16:09:01 +000012567 ctxt->recovery = recovery;
12568
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012569 xmlParseDocument(ctxt);
12570
12571 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12572 else {
12573 ret = NULL;
12574 xmlFreeDoc(ctxt->myDoc);
12575 ctxt->myDoc = NULL;
12576 }
12577 if (sax != NULL)
12578 ctxt->sax = NULL;
12579 xmlFreeParserCtxt(ctxt);
12580
12581 return(ret);
12582}
12583
12584/**
Owen Taylor3473f882001-02-23 17:55:21 +000012585 * xmlSAXParseMemory:
12586 * @sax: the SAX handler block
12587 * @buffer: an pointer to a char array
12588 * @size: the size of the array
12589 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12590 * documents
12591 *
12592 * parse an XML in-memory block and use the given SAX function block
12593 * to handle the parsing callback. If sax is NULL, fallback to the default
12594 * DOM tree building routines.
12595 *
12596 * Returns the resulting document tree
12597 */
12598xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012599xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12600 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012601 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012602}
12603
12604/**
12605 * xmlParseMemory:
12606 * @buffer: an pointer to a char array
12607 * @size: the size of the array
12608 *
12609 * parse an XML in-memory block and build a tree.
12610 *
12611 * Returns the resulting document tree
12612 */
12613
Daniel Veillard50822cb2001-07-26 20:05:51 +000012614xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012615 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12616}
12617
12618/**
12619 * xmlRecoverMemory:
12620 * @buffer: an pointer to a char array
12621 * @size: the size of the array
12622 *
12623 * parse an XML in-memory block and build a tree.
12624 * In the case the document is not Well Formed, a tree is built anyway
12625 *
12626 * Returns the resulting document tree
12627 */
12628
Daniel Veillard50822cb2001-07-26 20:05:51 +000012629xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012630 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12631}
12632
12633/**
12634 * xmlSAXUserParseMemory:
12635 * @sax: a SAX handler
12636 * @user_data: The user data returned on SAX callbacks
12637 * @buffer: an in-memory XML document input
12638 * @size: the length of the XML document in bytes
12639 *
12640 * A better SAX parsing routine.
12641 * parse an XML in-memory buffer and call the given SAX handler routines.
12642 *
12643 * Returns 0 in case of success or a error number otherwise
12644 */
12645int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012646 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012647 int ret = 0;
12648 xmlParserCtxtPtr ctxt;
12649 xmlSAXHandlerPtr oldsax = NULL;
12650
Daniel Veillard9e923512002-08-14 08:48:52 +000012651 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012652 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12653 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012654 oldsax = ctxt->sax;
12655 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012656 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012657 if (user_data != NULL)
12658 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012659
12660 xmlParseDocument(ctxt);
12661
12662 if (ctxt->wellFormed)
12663 ret = 0;
12664 else {
12665 if (ctxt->errNo != 0)
12666 ret = ctxt->errNo;
12667 else
12668 ret = -1;
12669 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012670 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012671 if (ctxt->myDoc != NULL) {
12672 xmlFreeDoc(ctxt->myDoc);
12673 ctxt->myDoc = NULL;
12674 }
Owen Taylor3473f882001-02-23 17:55:21 +000012675 xmlFreeParserCtxt(ctxt);
12676
12677 return ret;
12678}
Daniel Veillard81273902003-09-30 00:43:48 +000012679#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012680
12681/**
12682 * xmlCreateDocParserCtxt:
12683 * @cur: a pointer to an array of xmlChar
12684 *
12685 * Creates a parser context for an XML in-memory document.
12686 *
12687 * Returns the new parser context or NULL
12688 */
12689xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012690xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012691 int len;
12692
12693 if (cur == NULL)
12694 return(NULL);
12695 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012696 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012697}
12698
Daniel Veillard81273902003-09-30 00:43:48 +000012699#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012700/**
12701 * xmlSAXParseDoc:
12702 * @sax: the SAX handler block
12703 * @cur: a pointer to an array of xmlChar
12704 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12705 * documents
12706 *
12707 * parse an XML in-memory document and build a tree.
12708 * It use the given SAX function block to handle the parsing callback.
12709 * If sax is NULL, fallback to the default DOM tree building routines.
12710 *
12711 * Returns the resulting document tree
12712 */
12713
12714xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012715xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012716 xmlDocPtr ret;
12717 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012718 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012719
Daniel Veillard38936062004-11-04 17:45:11 +000012720 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012721
12722
12723 ctxt = xmlCreateDocParserCtxt(cur);
12724 if (ctxt == NULL) return(NULL);
12725 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012726 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012727 ctxt->sax = sax;
12728 ctxt->userData = NULL;
12729 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012730 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012731
12732 xmlParseDocument(ctxt);
12733 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12734 else {
12735 ret = NULL;
12736 xmlFreeDoc(ctxt->myDoc);
12737 ctxt->myDoc = NULL;
12738 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012739 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012740 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012741 xmlFreeParserCtxt(ctxt);
12742
12743 return(ret);
12744}
12745
12746/**
12747 * xmlParseDoc:
12748 * @cur: a pointer to an array of xmlChar
12749 *
12750 * parse an XML in-memory document and build a tree.
12751 *
12752 * Returns the resulting document tree
12753 */
12754
12755xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012756xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012757 return(xmlSAXParseDoc(NULL, cur, 0));
12758}
Daniel Veillard81273902003-09-30 00:43:48 +000012759#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012760
Daniel Veillard81273902003-09-30 00:43:48 +000012761#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012762/************************************************************************
12763 * *
12764 * Specific function to keep track of entities references *
12765 * and used by the XSLT debugger *
12766 * *
12767 ************************************************************************/
12768
12769static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12770
12771/**
12772 * xmlAddEntityReference:
12773 * @ent : A valid entity
12774 * @firstNode : A valid first node for children of entity
12775 * @lastNode : A valid last node of children entity
12776 *
12777 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12778 */
12779static void
12780xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12781 xmlNodePtr lastNode)
12782{
12783 if (xmlEntityRefFunc != NULL) {
12784 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12785 }
12786}
12787
12788
12789/**
12790 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012791 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012792 *
12793 * Set the function to call call back when a xml reference has been made
12794 */
12795void
12796xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12797{
12798 xmlEntityRefFunc = func;
12799}
Daniel Veillard81273902003-09-30 00:43:48 +000012800#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012801
12802/************************************************************************
12803 * *
12804 * Miscellaneous *
12805 * *
12806 ************************************************************************/
12807
12808#ifdef LIBXML_XPATH_ENABLED
12809#include <libxml/xpath.h>
12810#endif
12811
Daniel Veillardffa3c742005-07-21 13:24:09 +000012812extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012813static int xmlParserInitialized = 0;
12814
12815/**
12816 * xmlInitParser:
12817 *
12818 * Initialization function for the XML parser.
12819 * This is not reentrant. Call once before processing in case of
12820 * use in multithreaded programs.
12821 */
12822
12823void
12824xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012825 if (xmlParserInitialized != 0)
12826 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012827
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012828 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12829 (xmlGenericError == NULL))
12830 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012831 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012832 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012833 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012834 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012835 xmlDefaultSAXHandlerInit();
12836 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012837#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012838 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012839#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012840#ifdef LIBXML_HTML_ENABLED
12841 htmlInitAutoClose();
12842 htmlDefaultSAXHandlerInit();
12843#endif
12844#ifdef LIBXML_XPATH_ENABLED
12845 xmlXPathInit();
12846#endif
12847 xmlParserInitialized = 1;
12848}
12849
12850/**
12851 * xmlCleanupParser:
12852 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012853 * Cleanup function for the XML library. It tries to reclaim all
12854 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012855 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012856 * function should not prevent reusing the library but one should
12857 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012858 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012859 */
12860
12861void
12862xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012863 if (!xmlParserInitialized)
12864 return;
12865
Owen Taylor3473f882001-02-23 17:55:21 +000012866 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012867#ifdef LIBXML_CATALOG_ENABLED
12868 xmlCatalogCleanup();
12869#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012870 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012871 xmlCleanupInputCallbacks();
12872#ifdef LIBXML_OUTPUT_ENABLED
12873 xmlCleanupOutputCallbacks();
12874#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012875#ifdef LIBXML_SCHEMAS_ENABLED
12876 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012877 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012878#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012879 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012880 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012881 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012882 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012883 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012884}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012885
12886/************************************************************************
12887 * *
12888 * New set (2.6.0) of simpler and more flexible APIs *
12889 * *
12890 ************************************************************************/
12891
12892/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012893 * DICT_FREE:
12894 * @str: a string
12895 *
12896 * Free a string if it is not owned by the "dict" dictionnary in the
12897 * current scope
12898 */
12899#define DICT_FREE(str) \
12900 if ((str) && ((!dict) || \
12901 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12902 xmlFree((char *)(str));
12903
12904/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012905 * xmlCtxtReset:
12906 * @ctxt: an XML parser context
12907 *
12908 * Reset a parser context
12909 */
12910void
12911xmlCtxtReset(xmlParserCtxtPtr ctxt)
12912{
12913 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012914 xmlDictPtr dict;
12915
12916 if (ctxt == NULL)
12917 return;
12918
12919 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012920
12921 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12922 xmlFreeInputStream(input);
12923 }
12924 ctxt->inputNr = 0;
12925 ctxt->input = NULL;
12926
12927 ctxt->spaceNr = 0;
12928 ctxt->spaceTab[0] = -1;
12929 ctxt->space = &ctxt->spaceTab[0];
12930
12931
12932 ctxt->nodeNr = 0;
12933 ctxt->node = NULL;
12934
12935 ctxt->nameNr = 0;
12936 ctxt->name = NULL;
12937
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012938 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012939 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012940 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012941 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012942 DICT_FREE(ctxt->directory);
12943 ctxt->directory = NULL;
12944 DICT_FREE(ctxt->extSubURI);
12945 ctxt->extSubURI = NULL;
12946 DICT_FREE(ctxt->extSubSystem);
12947 ctxt->extSubSystem = NULL;
12948 if (ctxt->myDoc != NULL)
12949 xmlFreeDoc(ctxt->myDoc);
12950 ctxt->myDoc = NULL;
12951
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012952 ctxt->standalone = -1;
12953 ctxt->hasExternalSubset = 0;
12954 ctxt->hasPErefs = 0;
12955 ctxt->html = 0;
12956 ctxt->external = 0;
12957 ctxt->instate = XML_PARSER_START;
12958 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012959
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012960 ctxt->wellFormed = 1;
12961 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012962 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012963 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012964#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012965 ctxt->vctxt.userData = ctxt;
12966 ctxt->vctxt.error = xmlParserValidityError;
12967 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012968#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012969 ctxt->record_info = 0;
12970 ctxt->nbChars = 0;
12971 ctxt->checkIndex = 0;
12972 ctxt->inSubset = 0;
12973 ctxt->errNo = XML_ERR_OK;
12974 ctxt->depth = 0;
12975 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12976 ctxt->catalogs = NULL;
12977 xmlInitNodeInfoSeq(&ctxt->node_seq);
12978
12979 if (ctxt->attsDefault != NULL) {
12980 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12981 ctxt->attsDefault = NULL;
12982 }
12983 if (ctxt->attsSpecial != NULL) {
12984 xmlHashFree(ctxt->attsSpecial, NULL);
12985 ctxt->attsSpecial = NULL;
12986 }
12987
Daniel Veillard4432df22003-09-28 18:58:27 +000012988#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012989 if (ctxt->catalogs != NULL)
12990 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012991#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012992 if (ctxt->lastError.code != XML_ERR_OK)
12993 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012994}
12995
12996/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012997 * xmlCtxtResetPush:
12998 * @ctxt: an XML parser context
12999 * @chunk: a pointer to an array of chars
13000 * @size: number of chars in the array
13001 * @filename: an optional file name or URI
13002 * @encoding: the document encoding, or NULL
13003 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013004 * Reset a push parser context
13005 *
13006 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013007 */
13008int
13009xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13010 int size, const char *filename, const char *encoding)
13011{
13012 xmlParserInputPtr inputStream;
13013 xmlParserInputBufferPtr buf;
13014 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13015
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013016 if (ctxt == NULL)
13017 return(1);
13018
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013019 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13020 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13021
13022 buf = xmlAllocParserInputBuffer(enc);
13023 if (buf == NULL)
13024 return(1);
13025
13026 if (ctxt == NULL) {
13027 xmlFreeParserInputBuffer(buf);
13028 return(1);
13029 }
13030
13031 xmlCtxtReset(ctxt);
13032
13033 if (ctxt->pushTab == NULL) {
13034 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13035 sizeof(xmlChar *));
13036 if (ctxt->pushTab == NULL) {
13037 xmlErrMemory(ctxt, NULL);
13038 xmlFreeParserInputBuffer(buf);
13039 return(1);
13040 }
13041 }
13042
13043 if (filename == NULL) {
13044 ctxt->directory = NULL;
13045 } else {
13046 ctxt->directory = xmlParserGetDirectory(filename);
13047 }
13048
13049 inputStream = xmlNewInputStream(ctxt);
13050 if (inputStream == NULL) {
13051 xmlFreeParserInputBuffer(buf);
13052 return(1);
13053 }
13054
13055 if (filename == NULL)
13056 inputStream->filename = NULL;
13057 else
13058 inputStream->filename = (char *)
13059 xmlCanonicPath((const xmlChar *) filename);
13060 inputStream->buf = buf;
13061 inputStream->base = inputStream->buf->buffer->content;
13062 inputStream->cur = inputStream->buf->buffer->content;
13063 inputStream->end =
13064 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13065
13066 inputPush(ctxt, inputStream);
13067
13068 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13069 (ctxt->input->buf != NULL)) {
13070 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13071 int cur = ctxt->input->cur - ctxt->input->base;
13072
13073 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13074
13075 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13076 ctxt->input->cur = ctxt->input->base + cur;
13077 ctxt->input->end =
13078 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13079 use];
13080#ifdef DEBUG_PUSH
13081 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13082#endif
13083 }
13084
13085 if (encoding != NULL) {
13086 xmlCharEncodingHandlerPtr hdlr;
13087
13088 hdlr = xmlFindCharEncodingHandler(encoding);
13089 if (hdlr != NULL) {
13090 xmlSwitchToEncoding(ctxt, hdlr);
13091 } else {
13092 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13093 "Unsupported encoding %s\n", BAD_CAST encoding);
13094 }
13095 } else if (enc != XML_CHAR_ENCODING_NONE) {
13096 xmlSwitchEncoding(ctxt, enc);
13097 }
13098
13099 return(0);
13100}
13101
13102/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013103 * xmlCtxtUseOptions:
13104 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013105 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013106 *
13107 * Applies the options to the parser context
13108 *
13109 * Returns 0 in case of success, the set of unknown or unimplemented options
13110 * in case of error.
13111 */
13112int
13113xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13114{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013115 if (ctxt == NULL)
13116 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013117 if (options & XML_PARSE_RECOVER) {
13118 ctxt->recovery = 1;
13119 options -= XML_PARSE_RECOVER;
13120 } else
13121 ctxt->recovery = 0;
13122 if (options & XML_PARSE_DTDLOAD) {
13123 ctxt->loadsubset = XML_DETECT_IDS;
13124 options -= XML_PARSE_DTDLOAD;
13125 } else
13126 ctxt->loadsubset = 0;
13127 if (options & XML_PARSE_DTDATTR) {
13128 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13129 options -= XML_PARSE_DTDATTR;
13130 }
13131 if (options & XML_PARSE_NOENT) {
13132 ctxt->replaceEntities = 1;
13133 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13134 options -= XML_PARSE_NOENT;
13135 } else
13136 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013137 if (options & XML_PARSE_PEDANTIC) {
13138 ctxt->pedantic = 1;
13139 options -= XML_PARSE_PEDANTIC;
13140 } else
13141 ctxt->pedantic = 0;
13142 if (options & XML_PARSE_NOBLANKS) {
13143 ctxt->keepBlanks = 0;
13144 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13145 options -= XML_PARSE_NOBLANKS;
13146 } else
13147 ctxt->keepBlanks = 1;
13148 if (options & XML_PARSE_DTDVALID) {
13149 ctxt->validate = 1;
13150 if (options & XML_PARSE_NOWARNING)
13151 ctxt->vctxt.warning = NULL;
13152 if (options & XML_PARSE_NOERROR)
13153 ctxt->vctxt.error = NULL;
13154 options -= XML_PARSE_DTDVALID;
13155 } else
13156 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013157 if (options & XML_PARSE_NOWARNING) {
13158 ctxt->sax->warning = NULL;
13159 options -= XML_PARSE_NOWARNING;
13160 }
13161 if (options & XML_PARSE_NOERROR) {
13162 ctxt->sax->error = NULL;
13163 ctxt->sax->fatalError = NULL;
13164 options -= XML_PARSE_NOERROR;
13165 }
Daniel Veillard81273902003-09-30 00:43:48 +000013166#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013167 if (options & XML_PARSE_SAX1) {
13168 ctxt->sax->startElement = xmlSAX2StartElement;
13169 ctxt->sax->endElement = xmlSAX2EndElement;
13170 ctxt->sax->startElementNs = NULL;
13171 ctxt->sax->endElementNs = NULL;
13172 ctxt->sax->initialized = 1;
13173 options -= XML_PARSE_SAX1;
13174 }
Daniel Veillard81273902003-09-30 00:43:48 +000013175#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013176 if (options & XML_PARSE_NODICT) {
13177 ctxt->dictNames = 0;
13178 options -= XML_PARSE_NODICT;
13179 } else {
13180 ctxt->dictNames = 1;
13181 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013182 if (options & XML_PARSE_NOCDATA) {
13183 ctxt->sax->cdataBlock = NULL;
13184 options -= XML_PARSE_NOCDATA;
13185 }
13186 if (options & XML_PARSE_NSCLEAN) {
13187 ctxt->options |= XML_PARSE_NSCLEAN;
13188 options -= XML_PARSE_NSCLEAN;
13189 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013190 if (options & XML_PARSE_NONET) {
13191 ctxt->options |= XML_PARSE_NONET;
13192 options -= XML_PARSE_NONET;
13193 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013194 if (options & XML_PARSE_COMPACT) {
13195 ctxt->options |= XML_PARSE_COMPACT;
13196 options -= XML_PARSE_COMPACT;
13197 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013198 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013199 return (options);
13200}
13201
13202/**
13203 * xmlDoRead:
13204 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013205 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013206 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013207 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013208 * @reuse: keep the context for reuse
13209 *
13210 * Common front-end for the xmlRead functions
13211 *
13212 * Returns the resulting document tree or NULL
13213 */
13214static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013215xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13216 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013217{
13218 xmlDocPtr ret;
13219
13220 xmlCtxtUseOptions(ctxt, options);
13221 if (encoding != NULL) {
13222 xmlCharEncodingHandlerPtr hdlr;
13223
13224 hdlr = xmlFindCharEncodingHandler(encoding);
13225 if (hdlr != NULL)
13226 xmlSwitchToEncoding(ctxt, hdlr);
13227 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013228 if ((URL != NULL) && (ctxt->input != NULL) &&
13229 (ctxt->input->filename == NULL))
13230 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013231 xmlParseDocument(ctxt);
13232 if ((ctxt->wellFormed) || ctxt->recovery)
13233 ret = ctxt->myDoc;
13234 else {
13235 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013236 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013237 xmlFreeDoc(ctxt->myDoc);
13238 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013239 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013240 ctxt->myDoc = NULL;
13241 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013242 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013243 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013244
13245 return (ret);
13246}
13247
13248/**
13249 * xmlReadDoc:
13250 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013251 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013252 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013253 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013254 *
13255 * parse an XML in-memory document and build a tree.
13256 *
13257 * Returns the resulting document tree
13258 */
13259xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013260xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013261{
13262 xmlParserCtxtPtr ctxt;
13263
13264 if (cur == NULL)
13265 return (NULL);
13266
13267 ctxt = xmlCreateDocParserCtxt(cur);
13268 if (ctxt == NULL)
13269 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013270 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013271}
13272
13273/**
13274 * xmlReadFile:
13275 * @filename: a file or URL
13276 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013277 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013278 *
13279 * parse an XML file from the filesystem or the network.
13280 *
13281 * Returns the resulting document tree
13282 */
13283xmlDocPtr
13284xmlReadFile(const char *filename, const char *encoding, int options)
13285{
13286 xmlParserCtxtPtr ctxt;
13287
Daniel Veillard61b93382003-11-03 14:28:31 +000013288 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013289 if (ctxt == NULL)
13290 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013291 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013292}
13293
13294/**
13295 * xmlReadMemory:
13296 * @buffer: a pointer to a char array
13297 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013298 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013299 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013300 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013301 *
13302 * parse an XML in-memory document and build a tree.
13303 *
13304 * Returns the resulting document tree
13305 */
13306xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013307xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013308{
13309 xmlParserCtxtPtr ctxt;
13310
13311 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13312 if (ctxt == NULL)
13313 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013314 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013315}
13316
13317/**
13318 * xmlReadFd:
13319 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013320 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013321 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013322 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013323 *
13324 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013325 * NOTE that the file descriptor will not be closed when the
13326 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327 *
13328 * Returns the resulting document tree
13329 */
13330xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013331xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013332{
13333 xmlParserCtxtPtr ctxt;
13334 xmlParserInputBufferPtr input;
13335 xmlParserInputPtr stream;
13336
13337 if (fd < 0)
13338 return (NULL);
13339
13340 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13341 if (input == NULL)
13342 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013343 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013344 ctxt = xmlNewParserCtxt();
13345 if (ctxt == NULL) {
13346 xmlFreeParserInputBuffer(input);
13347 return (NULL);
13348 }
13349 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13350 if (stream == NULL) {
13351 xmlFreeParserInputBuffer(input);
13352 xmlFreeParserCtxt(ctxt);
13353 return (NULL);
13354 }
13355 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013356 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013357}
13358
13359/**
13360 * xmlReadIO:
13361 * @ioread: an I/O read function
13362 * @ioclose: an I/O close function
13363 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013364 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013365 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013366 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013367 *
13368 * parse an XML document from I/O functions and source and build a tree.
13369 *
13370 * Returns the resulting document tree
13371 */
13372xmlDocPtr
13373xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013374 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013375{
13376 xmlParserCtxtPtr ctxt;
13377 xmlParserInputBufferPtr input;
13378 xmlParserInputPtr stream;
13379
13380 if (ioread == NULL)
13381 return (NULL);
13382
13383 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13384 XML_CHAR_ENCODING_NONE);
13385 if (input == NULL)
13386 return (NULL);
13387 ctxt = xmlNewParserCtxt();
13388 if (ctxt == NULL) {
13389 xmlFreeParserInputBuffer(input);
13390 return (NULL);
13391 }
13392 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13393 if (stream == NULL) {
13394 xmlFreeParserInputBuffer(input);
13395 xmlFreeParserCtxt(ctxt);
13396 return (NULL);
13397 }
13398 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013399 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013400}
13401
13402/**
13403 * xmlCtxtReadDoc:
13404 * @ctxt: an XML parser context
13405 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013406 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013407 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013408 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013409 *
13410 * parse an XML in-memory document and build a tree.
13411 * This reuses the existing @ctxt parser context
13412 *
13413 * Returns the resulting document tree
13414 */
13415xmlDocPtr
13416xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013417 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013418{
13419 xmlParserInputPtr stream;
13420
13421 if (cur == NULL)
13422 return (NULL);
13423 if (ctxt == NULL)
13424 return (NULL);
13425
13426 xmlCtxtReset(ctxt);
13427
13428 stream = xmlNewStringInputStream(ctxt, cur);
13429 if (stream == NULL) {
13430 return (NULL);
13431 }
13432 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013433 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013434}
13435
13436/**
13437 * xmlCtxtReadFile:
13438 * @ctxt: an XML parser context
13439 * @filename: a file or URL
13440 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013441 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013442 *
13443 * parse an XML file from the filesystem or the network.
13444 * This reuses the existing @ctxt parser context
13445 *
13446 * Returns the resulting document tree
13447 */
13448xmlDocPtr
13449xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13450 const char *encoding, int options)
13451{
13452 xmlParserInputPtr stream;
13453
13454 if (filename == NULL)
13455 return (NULL);
13456 if (ctxt == NULL)
13457 return (NULL);
13458
13459 xmlCtxtReset(ctxt);
13460
Daniel Veillard29614c72004-11-26 10:47:26 +000013461 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013462 if (stream == NULL) {
13463 return (NULL);
13464 }
13465 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013466 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013467}
13468
13469/**
13470 * xmlCtxtReadMemory:
13471 * @ctxt: an XML parser context
13472 * @buffer: a pointer to a char array
13473 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013474 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013475 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013476 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013477 *
13478 * parse an XML in-memory document and build a tree.
13479 * This reuses the existing @ctxt parser context
13480 *
13481 * Returns the resulting document tree
13482 */
13483xmlDocPtr
13484xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013485 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013486{
13487 xmlParserInputBufferPtr input;
13488 xmlParserInputPtr stream;
13489
13490 if (ctxt == NULL)
13491 return (NULL);
13492 if (buffer == NULL)
13493 return (NULL);
13494
13495 xmlCtxtReset(ctxt);
13496
13497 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13498 if (input == NULL) {
13499 return(NULL);
13500 }
13501
13502 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13503 if (stream == NULL) {
13504 xmlFreeParserInputBuffer(input);
13505 return(NULL);
13506 }
13507
13508 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013509 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013510}
13511
13512/**
13513 * xmlCtxtReadFd:
13514 * @ctxt: an XML parser context
13515 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013516 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013517 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013518 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013519 *
13520 * parse an XML from a file descriptor and build a tree.
13521 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013522 * NOTE that the file descriptor will not be closed when the
13523 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013524 *
13525 * Returns the resulting document tree
13526 */
13527xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013528xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13529 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013530{
13531 xmlParserInputBufferPtr input;
13532 xmlParserInputPtr stream;
13533
13534 if (fd < 0)
13535 return (NULL);
13536 if (ctxt == NULL)
13537 return (NULL);
13538
13539 xmlCtxtReset(ctxt);
13540
13541
13542 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13543 if (input == NULL)
13544 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013545 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013546 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13547 if (stream == NULL) {
13548 xmlFreeParserInputBuffer(input);
13549 return (NULL);
13550 }
13551 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013552 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013553}
13554
13555/**
13556 * xmlCtxtReadIO:
13557 * @ctxt: an XML parser context
13558 * @ioread: an I/O read function
13559 * @ioclose: an I/O close function
13560 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013561 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013562 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013563 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013564 *
13565 * parse an XML document from I/O functions and source and build a tree.
13566 * This reuses the existing @ctxt parser context
13567 *
13568 * Returns the resulting document tree
13569 */
13570xmlDocPtr
13571xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13572 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013573 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013574 const char *encoding, int options)
13575{
13576 xmlParserInputBufferPtr input;
13577 xmlParserInputPtr stream;
13578
13579 if (ioread == NULL)
13580 return (NULL);
13581 if (ctxt == NULL)
13582 return (NULL);
13583
13584 xmlCtxtReset(ctxt);
13585
13586 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13587 XML_CHAR_ENCODING_NONE);
13588 if (input == NULL)
13589 return (NULL);
13590 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13591 if (stream == NULL) {
13592 xmlFreeParserInputBuffer(input);
13593 return (NULL);
13594 }
13595 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013596 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013597}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013598
13599#define bottom_parser
13600#include "elfgcchack.h"