blob: 5dc6d01d1d4454f1d4d2f2c62a381706a63f891f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
Daniel Veillardac4118d2008-01-11 05:27:32 +0000960 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillardac4118d2008-01-11 05:27:32 +0000974 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
975 return;
976
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000977 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
978 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000979 return;
980
981mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000982 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000983 return;
984}
985
Daniel Veillard4432df22003-09-28 18:58:27 +0000986/**
Daniel Veillardac4118d2008-01-11 05:27:32 +0000987 * xmlCleanSpecialAttrCallback:
988 *
989 * Removes CDATA attributes from the special attribute table
990 */
991static void
992xmlCleanSpecialAttrCallback(void *payload, void *data,
993 const xmlChar *fullname, const xmlChar *fullattr,
994 const xmlChar *unused ATTRIBUTE_UNUSED) {
995 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
996
Daniel Veillardb3edafd2008-01-11 08:00:57 +0000997 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +0000998 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
999 }
1000}
1001
1002/**
1003 * xmlCleanSpecialAttr:
1004 * @ctxt: an XML parser context
1005 *
1006 * Trim the list of attributes defined to remove all those of type
1007 * CDATA as they are not special. This call should be done when finishing
1008 * to parse the DTD and before starting to parse the document root.
1009 */
1010static void
1011xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1012{
1013 if (ctxt->attsSpecial == NULL)
1014 return;
1015
1016 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1017
1018 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1019 xmlHashFree(ctxt->attsSpecial, NULL);
1020 ctxt->attsSpecial = NULL;
1021 }
1022 return;
1023}
1024
1025/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001026 * xmlCheckLanguageID:
1027 * @lang: pointer to the string value
1028 *
1029 * Checks that the value conforms to the LanguageID production:
1030 *
1031 * NOTE: this is somewhat deprecated, those productions were removed from
1032 * the XML Second edition.
1033 *
1034 * [33] LanguageID ::= Langcode ('-' Subcode)*
1035 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1036 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1037 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1038 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1039 * [38] Subcode ::= ([a-z] | [A-Z])+
1040 *
1041 * Returns 1 if correct 0 otherwise
1042 **/
1043int
1044xmlCheckLanguageID(const xmlChar * lang)
1045{
1046 const xmlChar *cur = lang;
1047
1048 if (cur == NULL)
1049 return (0);
1050 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1051 ((cur[0] == 'I') && (cur[1] == '-'))) {
1052 /*
1053 * IANA code
1054 */
1055 cur += 2;
1056 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1057 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1058 cur++;
1059 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1060 ((cur[0] == 'X') && (cur[1] == '-'))) {
1061 /*
1062 * User code
1063 */
1064 cur += 2;
1065 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1066 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1067 cur++;
1068 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1069 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1070 /*
1071 * ISO639
1072 */
1073 cur++;
1074 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1075 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1076 cur++;
1077 else
1078 return (0);
1079 } else
1080 return (0);
1081 while (cur[0] != 0) { /* non input consuming */
1082 if (cur[0] != '-')
1083 return (0);
1084 cur++;
1085 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1086 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1087 cur++;
1088 else
1089 return (0);
1090 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1091 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1092 cur++;
1093 }
1094 return (1);
1095}
1096
Owen Taylor3473f882001-02-23 17:55:21 +00001097/************************************************************************
1098 * *
1099 * Parser stacks related functions and macros *
1100 * *
1101 ************************************************************************/
1102
1103xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1104 const xmlChar ** str);
1105
Daniel Veillard0fb18932003-09-07 09:14:37 +00001106#ifdef SAX2
1107/**
1108 * nsPush:
1109 * @ctxt: an XML parser context
1110 * @prefix: the namespace prefix or NULL
1111 * @URL: the namespace name
1112 *
1113 * Pushes a new parser namespace on top of the ns stack
1114 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001115 * Returns -1 in case of error, -2 if the namespace should be discarded
1116 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001117 */
1118static int
1119nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1120{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001121 if (ctxt->options & XML_PARSE_NSCLEAN) {
1122 int i;
1123 for (i = 0;i < ctxt->nsNr;i += 2) {
1124 if (ctxt->nsTab[i] == prefix) {
1125 /* in scope */
1126 if (ctxt->nsTab[i + 1] == URL)
1127 return(-2);
1128 /* out of scope keep it */
1129 break;
1130 }
1131 }
1132 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001133 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1134 ctxt->nsMax = 10;
1135 ctxt->nsNr = 0;
1136 ctxt->nsTab = (const xmlChar **)
1137 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1138 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001139 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->nsMax = 0;
1141 return (-1);
1142 }
1143 } else if (ctxt->nsNr >= ctxt->nsMax) {
1144 ctxt->nsMax *= 2;
1145 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001146 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001147 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1148 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001149 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 ctxt->nsMax /= 2;
1151 return (-1);
1152 }
1153 }
1154 ctxt->nsTab[ctxt->nsNr++] = prefix;
1155 ctxt->nsTab[ctxt->nsNr++] = URL;
1156 return (ctxt->nsNr);
1157}
1158/**
1159 * nsPop:
1160 * @ctxt: an XML parser context
1161 * @nr: the number to pop
1162 *
1163 * Pops the top @nr parser prefix/namespace from the ns stack
1164 *
1165 * Returns the number of namespaces removed
1166 */
1167static int
1168nsPop(xmlParserCtxtPtr ctxt, int nr)
1169{
1170 int i;
1171
1172 if (ctxt->nsTab == NULL) return(0);
1173 if (ctxt->nsNr < nr) {
1174 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1175 nr = ctxt->nsNr;
1176 }
1177 if (ctxt->nsNr <= 0)
1178 return (0);
1179
1180 for (i = 0;i < nr;i++) {
1181 ctxt->nsNr--;
1182 ctxt->nsTab[ctxt->nsNr] = NULL;
1183 }
1184 return(nr);
1185}
1186#endif
1187
1188static int
1189xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1190 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001191 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001192 int maxatts;
1193
1194 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001195 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001196 atts = (const xmlChar **)
1197 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001199 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001200 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1201 if (attallocs == NULL) goto mem_error;
1202 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001203 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001204 } else if (nr + 5 > ctxt->maxatts) {
1205 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001206 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1207 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001208 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001209 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001210 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1211 (maxatts / 5) * sizeof(int));
1212 if (attallocs == NULL) goto mem_error;
1213 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001214 ctxt->maxatts = maxatts;
1215 }
1216 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001217mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001218 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001219 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001220}
1221
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001222/**
1223 * inputPush:
1224 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001225 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001226 *
1227 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001228 *
1229 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001230 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001231int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001232inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1233{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001234 if ((ctxt == NULL) || (value == NULL))
1235 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 if (ctxt->inputNr >= ctxt->inputMax) {
1237 ctxt->inputMax *= 2;
1238 ctxt->inputTab =
1239 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1240 ctxt->inputMax *
1241 sizeof(ctxt->inputTab[0]));
1242 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001243 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001244 return (0);
1245 }
1246 }
1247 ctxt->inputTab[ctxt->inputNr] = value;
1248 ctxt->input = value;
1249 return (ctxt->inputNr++);
1250}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001251/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001252 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001253 * @ctxt: an XML parser context
1254 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001255 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001256 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001257 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001258 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001259xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260inputPop(xmlParserCtxtPtr ctxt)
1261{
1262 xmlParserInputPtr ret;
1263
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001264 if (ctxt == NULL)
1265 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001266 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001267 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->inputNr--;
1269 if (ctxt->inputNr > 0)
1270 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1271 else
1272 ctxt->input = NULL;
1273 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001274 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001275 return (ret);
1276}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001277/**
1278 * nodePush:
1279 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001280 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001281 *
1282 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001283 *
1284 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001285 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001286int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001287nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1288{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001289 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001290 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001291 xmlNodePtr *tmp;
1292
1293 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1294 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001296 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001297 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001298 return (0);
1299 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001300 ctxt->nodeTab = tmp;
1301 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001302 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001303 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001304 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001305 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1306 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001307 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001308 return(0);
1309 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001310 ctxt->nodeTab[ctxt->nodeNr] = value;
1311 ctxt->node = value;
1312 return (ctxt->nodeNr++);
1313}
1314/**
1315 * nodePop:
1316 * @ctxt: an XML parser context
1317 *
1318 * Pops the top element node from the node stack
1319 *
1320 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001321 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001322xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001323nodePop(xmlParserCtxtPtr ctxt)
1324{
1325 xmlNodePtr ret;
1326
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001327 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001328 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001329 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001330 ctxt->nodeNr--;
1331 if (ctxt->nodeNr > 0)
1332 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1333 else
1334 ctxt->node = NULL;
1335 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001336 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001337 return (ret);
1338}
Daniel Veillarda2351322004-06-27 12:08:10 +00001339
1340#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001341/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001342 * nameNsPush:
1343 * @ctxt: an XML parser context
1344 * @value: the element name
1345 * @prefix: the element prefix
1346 * @URI: the element namespace name
1347 *
1348 * Pushes a new element name/prefix/URL on top of the name stack
1349 *
1350 * Returns -1 in case of error, the index in the stack otherwise
1351 */
1352static int
1353nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1354 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1355{
1356 if (ctxt->nameNr >= ctxt->nameMax) {
1357 const xmlChar * *tmp;
1358 void **tmp2;
1359 ctxt->nameMax *= 2;
1360 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1361 ctxt->nameMax *
1362 sizeof(ctxt->nameTab[0]));
1363 if (tmp == NULL) {
1364 ctxt->nameMax /= 2;
1365 goto mem_error;
1366 }
1367 ctxt->nameTab = tmp;
1368 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1369 ctxt->nameMax * 3 *
1370 sizeof(ctxt->pushTab[0]));
1371 if (tmp2 == NULL) {
1372 ctxt->nameMax /= 2;
1373 goto mem_error;
1374 }
1375 ctxt->pushTab = tmp2;
1376 }
1377 ctxt->nameTab[ctxt->nameNr] = value;
1378 ctxt->name = value;
1379 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1380 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001381 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001382 return (ctxt->nameNr++);
1383mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001384 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001385 return (-1);
1386}
1387/**
1388 * nameNsPop:
1389 * @ctxt: an XML parser context
1390 *
1391 * Pops the top element/prefix/URI name from the name stack
1392 *
1393 * Returns the name just removed
1394 */
1395static const xmlChar *
1396nameNsPop(xmlParserCtxtPtr ctxt)
1397{
1398 const xmlChar *ret;
1399
1400 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001401 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 ctxt->nameNr--;
1403 if (ctxt->nameNr > 0)
1404 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1405 else
1406 ctxt->name = NULL;
1407 ret = ctxt->nameTab[ctxt->nameNr];
1408 ctxt->nameTab[ctxt->nameNr] = NULL;
1409 return (ret);
1410}
Daniel Veillarda2351322004-06-27 12:08:10 +00001411#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001412
1413/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001414 * namePush:
1415 * @ctxt: an XML parser context
1416 * @value: the element name
1417 *
1418 * Pushes a new element name on top of the name stack
1419 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001420 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001421 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001422int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001423namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001424{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001425 if (ctxt == NULL) return (-1);
1426
Daniel Veillard1c732d22002-11-30 11:22:59 +00001427 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001428 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001429 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001430 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001431 ctxt->nameMax *
1432 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001433 if (tmp == NULL) {
1434 ctxt->nameMax /= 2;
1435 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001436 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001437 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001438 }
1439 ctxt->nameTab[ctxt->nameNr] = value;
1440 ctxt->name = value;
1441 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001442mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001443 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001444 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445}
1446/**
1447 * namePop:
1448 * @ctxt: an XML parser context
1449 *
1450 * Pops the top element name from the name stack
1451 *
1452 * Returns the name just removed
1453 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001454const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001455namePop(xmlParserCtxtPtr ctxt)
1456{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001457 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001458
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001459 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1460 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001461 ctxt->nameNr--;
1462 if (ctxt->nameNr > 0)
1463 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1464 else
1465 ctxt->name = NULL;
1466 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001467 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001468 return (ret);
1469}
Owen Taylor3473f882001-02-23 17:55:21 +00001470
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001471static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001472 if (ctxt->spaceNr >= ctxt->spaceMax) {
1473 ctxt->spaceMax *= 2;
1474 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1475 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1476 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001477 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001478 return(0);
1479 }
1480 }
1481 ctxt->spaceTab[ctxt->spaceNr] = val;
1482 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1483 return(ctxt->spaceNr++);
1484}
1485
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001486static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001487 int ret;
1488 if (ctxt->spaceNr <= 0) return(0);
1489 ctxt->spaceNr--;
1490 if (ctxt->spaceNr > 0)
1491 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1492 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001493 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001494 ret = ctxt->spaceTab[ctxt->spaceNr];
1495 ctxt->spaceTab[ctxt->spaceNr] = -1;
1496 return(ret);
1497}
1498
1499/*
1500 * Macros for accessing the content. Those should be used only by the parser,
1501 * and not exported.
1502 *
1503 * Dirty macros, i.e. one often need to make assumption on the context to
1504 * use them
1505 *
1506 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1507 * To be used with extreme caution since operations consuming
1508 * characters may move the input buffer to a different location !
1509 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1510 * This should be used internally by the parser
1511 * only to compare to ASCII values otherwise it would break when
1512 * running with UTF-8 encoding.
1513 * RAW same as CUR but in the input buffer, bypass any token
1514 * extraction that may have been done
1515 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1516 * to compare on ASCII based substring.
1517 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001518 * strings without newlines within the parser.
1519 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1520 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001521 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1522 *
1523 * NEXT Skip to the next character, this does the proper decoding
1524 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001525 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001526 * CUR_CHAR(l) returns the current unicode character (int), set l
1527 * to the number of xmlChars used for the encoding [0-5].
1528 * CUR_SCHAR same but operate on a string instead of the context
1529 * COPY_BUF copy the current unicode char to the target buffer, increment
1530 * the index
1531 * GROW, SHRINK handling of input buffers
1532 */
1533
Daniel Veillardfdc91562002-07-01 21:52:03 +00001534#define RAW (*ctxt->input->cur)
1535#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001536#define NXT(val) ctxt->input->cur[(val)]
1537#define CUR_PTR ctxt->input->cur
1538
Daniel Veillarda07050d2003-10-19 14:46:32 +00001539#define CMP4( s, c1, c2, c3, c4 ) \
1540 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1541 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1542#define CMP5( s, c1, c2, c3, c4, c5 ) \
1543 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1544#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1545 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1546#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1547 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1548#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1549 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1550#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1551 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1552 ((unsigned char *) s)[ 8 ] == c9 )
1553#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1554 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1555 ((unsigned char *) s)[ 9 ] == c10 )
1556
Owen Taylor3473f882001-02-23 17:55:21 +00001557#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001558 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001559 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001560 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001561 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1562 xmlPopInput(ctxt); \
1563 } while (0)
1564
Daniel Veillard0b787f32004-03-26 17:29:53 +00001565#define SKIPL(val) do { \
1566 int skipl; \
1567 for(skipl=0; skipl<val; skipl++) { \
1568 if (*(ctxt->input->cur) == '\n') { \
1569 ctxt->input->line++; ctxt->input->col = 1; \
1570 } else ctxt->input->col++; \
1571 ctxt->nbChars++; \
1572 ctxt->input->cur++; \
1573 } \
1574 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1575 if ((*ctxt->input->cur == 0) && \
1576 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1577 xmlPopInput(ctxt); \
1578 } while (0)
1579
Daniel Veillarda880b122003-04-21 21:36:41 +00001580#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001581 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1582 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001583 xmlSHRINK (ctxt);
1584
1585static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1586 xmlParserInputShrink(ctxt->input);
1587 if ((*ctxt->input->cur == 0) &&
1588 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1589 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001590 }
Owen Taylor3473f882001-02-23 17:55:21 +00001591
Daniel Veillarda880b122003-04-21 21:36:41 +00001592#define GROW if ((ctxt->progressive == 0) && \
1593 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001594 xmlGROW (ctxt);
1595
1596static void xmlGROW (xmlParserCtxtPtr ctxt) {
1597 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1598 if ((*ctxt->input->cur == 0) &&
1599 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1600 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001601}
Owen Taylor3473f882001-02-23 17:55:21 +00001602
1603#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1604
1605#define NEXT xmlNextChar(ctxt)
1606
Daniel Veillard21a0f912001-02-25 19:54:14 +00001607#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001608 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001609 ctxt->input->cur++; \
1610 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001611 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001612 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1613 }
1614
Owen Taylor3473f882001-02-23 17:55:21 +00001615#define NEXTL(l) do { \
1616 if (*(ctxt->input->cur) == '\n') { \
1617 ctxt->input->line++; ctxt->input->col = 1; \
1618 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001619 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001620 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001621 } while (0)
1622
1623#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1624#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1625
1626#define COPY_BUF(l,b,i,v) \
1627 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001628 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001629
1630/**
1631 * xmlSkipBlankChars:
1632 * @ctxt: the XML parser context
1633 *
1634 * skip all blanks character found at that point in the input streams.
1635 * It pops up finished entities in the process if allowable at that point.
1636 *
1637 * Returns the number of space chars skipped
1638 */
1639
1640int
1641xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001642 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001643
1644 /*
1645 * It's Okay to use CUR/NEXT here since all the blanks are on
1646 * the ASCII range.
1647 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001648 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1649 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001650 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001651 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001652 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001653 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001654 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001655 if (*cur == '\n') {
1656 ctxt->input->line++; ctxt->input->col = 1;
1657 }
1658 cur++;
1659 res++;
1660 if (*cur == 0) {
1661 ctxt->input->cur = cur;
1662 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1663 cur = ctxt->input->cur;
1664 }
1665 }
1666 ctxt->input->cur = cur;
1667 } else {
1668 int cur;
1669 do {
1670 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001671 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001672 NEXT;
1673 cur = CUR;
1674 res++;
1675 }
1676 while ((cur == 0) && (ctxt->inputNr > 1) &&
1677 (ctxt->instate != XML_PARSER_COMMENT)) {
1678 xmlPopInput(ctxt);
1679 cur = CUR;
1680 }
1681 /*
1682 * Need to handle support of entities branching here
1683 */
1684 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1685 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1686 }
Owen Taylor3473f882001-02-23 17:55:21 +00001687 return(res);
1688}
1689
1690/************************************************************************
1691 * *
1692 * Commodity functions to handle entities *
1693 * *
1694 ************************************************************************/
1695
1696/**
1697 * xmlPopInput:
1698 * @ctxt: an XML parser context
1699 *
1700 * xmlPopInput: the current input pointed by ctxt->input came to an end
1701 * pop it and return the next char.
1702 *
1703 * Returns the current xmlChar in the parser context
1704 */
1705xmlChar
1706xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001707 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001708 if (xmlParserDebugEntities)
1709 xmlGenericError(xmlGenericErrorContext,
1710 "Popping input %d\n", ctxt->inputNr);
1711 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001712 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001713 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1714 return(xmlPopInput(ctxt));
1715 return(CUR);
1716}
1717
1718/**
1719 * xmlPushInput:
1720 * @ctxt: an XML parser context
1721 * @input: an XML parser input fragment (entity, XML fragment ...).
1722 *
1723 * xmlPushInput: switch to a new input stream which is stacked on top
1724 * of the previous one(s).
1725 */
1726void
1727xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1728 if (input == NULL) return;
1729
1730 if (xmlParserDebugEntities) {
1731 if ((ctxt->input != NULL) && (ctxt->input->filename))
1732 xmlGenericError(xmlGenericErrorContext,
1733 "%s(%d): ", ctxt->input->filename,
1734 ctxt->input->line);
1735 xmlGenericError(xmlGenericErrorContext,
1736 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1737 }
1738 inputPush(ctxt, input);
1739 GROW;
1740}
1741
1742/**
1743 * xmlParseCharRef:
1744 * @ctxt: an XML parser context
1745 *
1746 * parse Reference declarations
1747 *
1748 * [66] CharRef ::= '&#' [0-9]+ ';' |
1749 * '&#x' [0-9a-fA-F]+ ';'
1750 *
1751 * [ WFC: Legal Character ]
1752 * Characters referred to using character references must match the
1753 * production for Char.
1754 *
1755 * Returns the value parsed (as an int), 0 in case of error
1756 */
1757int
1758xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001759 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001761 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001762
Owen Taylor3473f882001-02-23 17:55:21 +00001763 /*
1764 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1765 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001766 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001767 (NXT(2) == 'x')) {
1768 SKIP(3);
1769 GROW;
1770 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001771 if (count++ > 20) {
1772 count = 0;
1773 GROW;
1774 }
1775 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001776 val = val * 16 + (CUR - '0');
1777 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1778 val = val * 16 + (CUR - 'a') + 10;
1779 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1780 val = val * 16 + (CUR - 'A') + 10;
1781 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001782 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001783 val = 0;
1784 break;
1785 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001786 if (val > 0x10FFFF)
1787 outofrange = val;
1788
Owen Taylor3473f882001-02-23 17:55:21 +00001789 NEXT;
1790 count++;
1791 }
1792 if (RAW == ';') {
1793 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001794 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001795 ctxt->nbChars ++;
1796 ctxt->input->cur++;
1797 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001798 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001799 SKIP(2);
1800 GROW;
1801 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001802 if (count++ > 20) {
1803 count = 0;
1804 GROW;
1805 }
1806 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001807 val = val * 10 + (CUR - '0');
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 NEXT;
1817 count++;
1818 }
1819 if (RAW == ';') {
1820 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001821 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001822 ctxt->nbChars ++;
1823 ctxt->input->cur++;
1824 }
1825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001827 }
1828
1829 /*
1830 * [ WFC: Legal Character ]
1831 * Characters referred to using character references must match the
1832 * production for Char.
1833 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001834 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001835 return(val);
1836 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001837 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1838 "xmlParseCharRef: invalid xmlChar value %d\n",
1839 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001840 }
1841 return(0);
1842}
1843
1844/**
1845 * xmlParseStringCharRef:
1846 * @ctxt: an XML parser context
1847 * @str: a pointer to an index in the string
1848 *
1849 * parse Reference declarations, variant parsing from a string rather
1850 * than an an input flow.
1851 *
1852 * [66] CharRef ::= '&#' [0-9]+ ';' |
1853 * '&#x' [0-9a-fA-F]+ ';'
1854 *
1855 * [ WFC: Legal Character ]
1856 * Characters referred to using character references must match the
1857 * production for Char.
1858 *
1859 * Returns the value parsed (as an int), 0 in case of error, str will be
1860 * updated to the current value of the index
1861 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001862static int
Owen Taylor3473f882001-02-23 17:55:21 +00001863xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1864 const xmlChar *ptr;
1865 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001866 unsigned int val = 0;
1867 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001868
1869 if ((str == NULL) || (*str == NULL)) return(0);
1870 ptr = *str;
1871 cur = *ptr;
1872 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1873 ptr += 3;
1874 cur = *ptr;
1875 while (cur != ';') { /* Non input consuming loop */
1876 if ((cur >= '0') && (cur <= '9'))
1877 val = val * 16 + (cur - '0');
1878 else if ((cur >= 'a') && (cur <= 'f'))
1879 val = val * 16 + (cur - 'a') + 10;
1880 else if ((cur >= 'A') && (cur <= 'F'))
1881 val = val * 16 + (cur - 'A') + 10;
1882 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001883 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001884 val = 0;
1885 break;
1886 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001887 if (val > 0x10FFFF)
1888 outofrange = val;
1889
Owen Taylor3473f882001-02-23 17:55:21 +00001890 ptr++;
1891 cur = *ptr;
1892 }
1893 if (cur == ';')
1894 ptr++;
1895 } else if ((cur == '&') && (ptr[1] == '#')){
1896 ptr += 2;
1897 cur = *ptr;
1898 while (cur != ';') { /* Non input consuming loops */
1899 if ((cur >= '0') && (cur <= '9'))
1900 val = val * 10 + (cur - '0');
1901 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001902 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 val = 0;
1904 break;
1905 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001906 if (val > 0x10FFFF)
1907 outofrange = val;
1908
Owen Taylor3473f882001-02-23 17:55:21 +00001909 ptr++;
1910 cur = *ptr;
1911 }
1912 if (cur == ';')
1913 ptr++;
1914 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001915 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001916 return(0);
1917 }
1918 *str = ptr;
1919
1920 /*
1921 * [ WFC: Legal Character ]
1922 * Characters referred to using character references must match the
1923 * production for Char.
1924 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001925 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001926 return(val);
1927 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001928 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1929 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1930 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001931 }
1932 return(0);
1933}
1934
1935/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001936 * xmlNewBlanksWrapperInputStream:
1937 * @ctxt: an XML parser context
1938 * @entity: an Entity pointer
1939 *
1940 * Create a new input stream for wrapping
1941 * blanks around a PEReference
1942 *
1943 * Returns the new input stream or NULL
1944 */
1945
1946static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1947
Daniel Veillardf4862f02002-09-10 11:13:43 +00001948static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001949xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1950 xmlParserInputPtr input;
1951 xmlChar *buffer;
1952 size_t length;
1953 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001954 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1955 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new blanks wrapper for entity: %s\n", entity->name);
1961 input = xmlNewInputStream(ctxt);
1962 if (input == NULL) {
1963 return(NULL);
1964 }
1965 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001966 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001967 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001968 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001969 return(NULL);
1970 }
1971 buffer [0] = ' ';
1972 buffer [1] = '%';
1973 buffer [length-3] = ';';
1974 buffer [length-2] = ' ';
1975 buffer [length-1] = 0;
1976 memcpy(buffer + 2, entity->name, length - 5);
1977 input->free = deallocblankswrapper;
1978 input->base = buffer;
1979 input->cur = buffer;
1980 input->length = length;
1981 input->end = &buffer[length];
1982 return(input);
1983}
1984
1985/**
Owen Taylor3473f882001-02-23 17:55:21 +00001986 * xmlParserHandlePEReference:
1987 * @ctxt: the parser context
1988 *
1989 * [69] PEReference ::= '%' Name ';'
1990 *
1991 * [ WFC: No Recursion ]
1992 * A parsed entity must not contain a recursive
1993 * reference to itself, either directly or indirectly.
1994 *
1995 * [ WFC: Entity Declared ]
1996 * In a document without any DTD, a document with only an internal DTD
1997 * subset which contains no parameter entity references, or a document
1998 * with "standalone='yes'", ... ... The declaration of a parameter
1999 * entity must precede any reference to it...
2000 *
2001 * [ VC: Entity Declared ]
2002 * In a document with an external subset or external parameter entities
2003 * with "standalone='no'", ... ... The declaration of a parameter entity
2004 * must precede any reference to it...
2005 *
2006 * [ WFC: In DTD ]
2007 * Parameter-entity references may only appear in the DTD.
2008 * NOTE: misleading but this is handled.
2009 *
2010 * A PEReference may have been detected in the current input stream
2011 * the handling is done accordingly to
2012 * http://www.w3.org/TR/REC-xml#entproc
2013 * i.e.
2014 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002015 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002016 */
2017void
2018xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002019 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002020 xmlEntityPtr entity = NULL;
2021 xmlParserInputPtr input;
2022
Owen Taylor3473f882001-02-23 17:55:21 +00002023 if (RAW != '%') return;
2024 switch(ctxt->instate) {
2025 case XML_PARSER_CDATA_SECTION:
2026 return;
2027 case XML_PARSER_COMMENT:
2028 return;
2029 case XML_PARSER_START_TAG:
2030 return;
2031 case XML_PARSER_END_TAG:
2032 return;
2033 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002034 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002035 return;
2036 case XML_PARSER_PROLOG:
2037 case XML_PARSER_START:
2038 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 return;
2041 case XML_PARSER_ENTITY_DECL:
2042 case XML_PARSER_CONTENT:
2043 case XML_PARSER_ATTRIBUTE_VALUE:
2044 case XML_PARSER_PI:
2045 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002046 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002047 /* we just ignore it there */
2048 return;
2049 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002050 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002051 return;
2052 case XML_PARSER_ENTITY_VALUE:
2053 /*
2054 * NOTE: in the case of entity values, we don't do the
2055 * substitution here since we need the literal
2056 * entity value to be able to save the internal
2057 * subset of the document.
2058 * This will be handled by xmlStringDecodeEntities
2059 */
2060 return;
2061 case XML_PARSER_DTD:
2062 /*
2063 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2064 * In the internal DTD subset, parameter-entity references
2065 * can occur only where markup declarations can occur, not
2066 * within markup declarations.
2067 * In that case this is handled in xmlParseMarkupDecl
2068 */
2069 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2070 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002071 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002072 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002073 break;
2074 case XML_PARSER_IGNORE:
2075 return;
2076 }
2077
2078 NEXT;
2079 name = xmlParseName(ctxt);
2080 if (xmlParserDebugEntities)
2081 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002082 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002083 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002084 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002085 } else {
2086 if (RAW == ';') {
2087 NEXT;
2088 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2089 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2090 if (entity == NULL) {
2091
2092 /*
2093 * [ WFC: Entity Declared ]
2094 * In a document without any DTD, a document with only an
2095 * internal DTD subset which contains no parameter entity
2096 * references, or a document with "standalone='yes'", ...
2097 * ... The declaration of a parameter entity must precede
2098 * any reference to it...
2099 */
2100 if ((ctxt->standalone == 1) ||
2101 ((ctxt->hasExternalSubset == 0) &&
2102 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002103 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002104 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002105 } else {
2106 /*
2107 * [ VC: Entity Declared ]
2108 * In a document with an external subset or external
2109 * parameter entities with "standalone='no'", ...
2110 * ... The declaration of a parameter entity must precede
2111 * any reference to it...
2112 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002113 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2114 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2115 "PEReference: %%%s; not found\n",
2116 name);
2117 } else
2118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2119 "PEReference: %%%s; not found\n",
2120 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002121 ctxt->valid = 0;
2122 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002123 } else if (ctxt->input->free != deallocblankswrapper) {
2124 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2125 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002126 } else {
2127 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2128 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002129 xmlChar start[4];
2130 xmlCharEncoding enc;
2131
Owen Taylor3473f882001-02-23 17:55:21 +00002132 /*
2133 * handle the extra spaces added before and after
2134 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002135 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002136 */
2137 input = xmlNewEntityInputStream(ctxt, entity);
2138 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002139
2140 /*
2141 * Get the 4 first bytes and decode the charset
2142 * if enc != XML_CHAR_ENCODING_NONE
2143 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002144 * Note that, since we may have some non-UTF8
2145 * encoding (like UTF16, bug 135229), the 'length'
2146 * is not known, but we can calculate based upon
2147 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002148 */
2149 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002150 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002151 start[0] = RAW;
2152 start[1] = NXT(1);
2153 start[2] = NXT(2);
2154 start[3] = NXT(3);
2155 enc = xmlDetectCharEncoding(start, 4);
2156 if (enc != XML_CHAR_ENCODING_NONE) {
2157 xmlSwitchEncoding(ctxt, enc);
2158 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002159 }
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002162 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2163 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002164 xmlParseTextDecl(ctxt);
2165 }
Owen Taylor3473f882001-02-23 17:55:21 +00002166 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002167 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2168 "PEReference: %s is not a parameter entity\n",
2169 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002170 }
2171 }
2172 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002173 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002174 }
Owen Taylor3473f882001-02-23 17:55:21 +00002175 }
2176}
2177
2178/*
2179 * Macro used to grow the current buffer.
2180 */
2181#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002182 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002183 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002184 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002185 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002186 if (tmp == NULL) goto mem_error; \
2187 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002188}
2189
2190/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002191 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002192 * @ctxt: the parser context
2193 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002194 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002195 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2196 * @end: an end marker xmlChar, 0 if none
2197 * @end2: an end marker xmlChar, 0 if none
2198 * @end3: an end marker xmlChar, 0 if none
2199 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002200 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002201 *
2202 * [67] Reference ::= EntityRef | CharRef
2203 *
2204 * [69] PEReference ::= '%' Name ';'
2205 *
2206 * Returns A newly allocated string with the substitution done. The caller
2207 * must deallocate it !
2208 */
2209xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002210xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2211 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002212 xmlChar *buffer = NULL;
2213 int buffer_size = 0;
2214
2215 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002216 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 xmlEntityPtr ent;
2218 int c,l;
2219 int nbchars = 0;
2220
Daniel Veillarda82b1822004-11-08 16:24:57 +00002221 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002222 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002223 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002224
2225 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229
2230 /*
2231 * allocate a translation buffer.
2232 */
2233 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002234 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002235 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002236
2237 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002238 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002239 * we are operating on already parsed values.
2240 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002241 if (str < last)
2242 c = CUR_SCHAR(str, l);
2243 else
2244 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002245 while ((c != 0) && (c != end) && /* non input consuming loop */
2246 (c != end2) && (c != end3)) {
2247
2248 if (c == 0) break;
2249 if ((c == '&') && (str[1] == '#')) {
2250 int val = xmlParseStringCharRef(ctxt, &str);
2251 if (val != 0) {
2252 COPY_BUF(0,buffer,nbchars,val);
2253 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002254 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2255 growBuffer(buffer);
2256 }
Owen Taylor3473f882001-02-23 17:55:21 +00002257 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2258 if (xmlParserDebugEntities)
2259 xmlGenericError(xmlGenericErrorContext,
2260 "String decoding Entity Reference: %.30s\n",
2261 str);
2262 ent = xmlParseStringEntityRef(ctxt, &str);
2263 if ((ent != NULL) &&
2264 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2265 if (ent->content != NULL) {
2266 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002267 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2268 growBuffer(buffer);
2269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002271 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2272 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002273 }
2274 } else if ((ent != NULL) && (ent->content != NULL)) {
2275 xmlChar *rep;
2276
2277 ctxt->depth++;
2278 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2279 0, 0, 0);
2280 ctxt->depth--;
2281 if (rep != NULL) {
2282 current = rep;
2283 while (*current != 0) { /* non input consuming loop */
2284 buffer[nbchars++] = *current++;
2285 if (nbchars >
2286 buffer_size - XML_PARSER_BUFFER_SIZE) {
2287 growBuffer(buffer);
2288 }
2289 }
2290 xmlFree(rep);
2291 }
2292 } else if (ent != NULL) {
2293 int i = xmlStrlen(ent->name);
2294 const xmlChar *cur = ent->name;
2295
2296 buffer[nbchars++] = '&';
2297 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2298 growBuffer(buffer);
2299 }
2300 for (;i > 0;i--)
2301 buffer[nbchars++] = *cur++;
2302 buffer[nbchars++] = ';';
2303 }
2304 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2305 if (xmlParserDebugEntities)
2306 xmlGenericError(xmlGenericErrorContext,
2307 "String decoding PE Reference: %.30s\n", str);
2308 ent = xmlParseStringPEReference(ctxt, &str);
2309 if (ent != NULL) {
2310 xmlChar *rep;
2311
2312 ctxt->depth++;
2313 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2314 0, 0, 0);
2315 ctxt->depth--;
2316 if (rep != NULL) {
2317 current = rep;
2318 while (*current != 0) { /* non input consuming loop */
2319 buffer[nbchars++] = *current++;
2320 if (nbchars >
2321 buffer_size - XML_PARSER_BUFFER_SIZE) {
2322 growBuffer(buffer);
2323 }
2324 }
2325 xmlFree(rep);
2326 }
2327 }
2328 } else {
2329 COPY_BUF(l,buffer,nbchars,c);
2330 str += l;
2331 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2332 growBuffer(buffer);
2333 }
2334 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002335 if (str < last)
2336 c = CUR_SCHAR(str, l);
2337 else
2338 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 }
2340 buffer[nbchars++] = 0;
2341 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002342
2343mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002345 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002346}
2347
Daniel Veillarde57ec792003-09-10 10:50:59 +00002348/**
2349 * xmlStringDecodeEntities:
2350 * @ctxt: the parser context
2351 * @str: the input string
2352 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2353 * @end: an end marker xmlChar, 0 if none
2354 * @end2: an end marker xmlChar, 0 if none
2355 * @end3: an end marker xmlChar, 0 if none
2356 *
2357 * Takes a entity string content and process to do the adequate substitutions.
2358 *
2359 * [67] Reference ::= EntityRef | CharRef
2360 *
2361 * [69] PEReference ::= '%' Name ';'
2362 *
2363 * Returns A newly allocated string with the substitution done. The caller
2364 * must deallocate it !
2365 */
2366xmlChar *
2367xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2368 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002369 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002370 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2371 end, end2, end3));
2372}
Owen Taylor3473f882001-02-23 17:55:21 +00002373
2374/************************************************************************
2375 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002376 * Commodity functions, cleanup needed ? *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * areBlanks:
2382 * @ctxt: an XML parser context
2383 * @str: a xmlChar *
2384 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002385 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002386 *
2387 * Is this a sequence of blank chars that one can ignore ?
2388 *
2389 * Returns 1 if ignorable 0 otherwise.
2390 */
2391
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2393 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002394 int i, ret;
2395 xmlNodePtr lastChild;
2396
Daniel Veillard05c13a22001-09-09 08:38:09 +00002397 /*
2398 * Don't spend time trying to differentiate them, the same callback is
2399 * used !
2400 */
2401 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002402 return(0);
2403
Owen Taylor3473f882001-02-23 17:55:21 +00002404 /*
2405 * Check for xml:space value.
2406 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002407 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2408 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002409 return(0);
2410
2411 /*
2412 * Check that the string is made of blanks
2413 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002414 if (blank_chars == 0) {
2415 for (i = 0;i < len;i++)
2416 if (!(IS_BLANK_CH(str[i]))) return(0);
2417 }
Owen Taylor3473f882001-02-23 17:55:21 +00002418
2419 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002420 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002421 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002422 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002423 if (ctxt->myDoc != NULL) {
2424 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2425 if (ret == 0) return(1);
2426 if (ret == 1) return(0);
2427 }
2428
2429 /*
2430 * Otherwise, heuristic :-\
2431 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002432 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002433 if ((ctxt->node->children == NULL) &&
2434 (RAW == '<') && (NXT(1) == '/')) return(0);
2435
2436 lastChild = xmlGetLastChild(ctxt->node);
2437 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002438 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2439 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002440 } else if (xmlNodeIsText(lastChild))
2441 return(0);
2442 else if ((ctxt->node->children != NULL) &&
2443 (xmlNodeIsText(ctxt->node->children)))
2444 return(0);
2445 return(1);
2446}
2447
Owen Taylor3473f882001-02-23 17:55:21 +00002448/************************************************************************
2449 * *
2450 * Extra stuff for namespace support *
2451 * Relates to http://www.w3.org/TR/WD-xml-names *
2452 * *
2453 ************************************************************************/
2454
2455/**
2456 * xmlSplitQName:
2457 * @ctxt: an XML parser context
2458 * @name: an XML parser context
2459 * @prefix: a xmlChar **
2460 *
2461 * parse an UTF8 encoded XML qualified name string
2462 *
2463 * [NS 5] QName ::= (Prefix ':')? LocalPart
2464 *
2465 * [NS 6] Prefix ::= NCName
2466 *
2467 * [NS 7] LocalPart ::= NCName
2468 *
2469 * Returns the local part, and prefix is updated
2470 * to get the Prefix if any.
2471 */
2472
2473xmlChar *
2474xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2475 xmlChar buf[XML_MAX_NAMELEN + 5];
2476 xmlChar *buffer = NULL;
2477 int len = 0;
2478 int max = XML_MAX_NAMELEN;
2479 xmlChar *ret = NULL;
2480 const xmlChar *cur = name;
2481 int c;
2482
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002483 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002484 *prefix = NULL;
2485
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002486 if (cur == NULL) return(NULL);
2487
Owen Taylor3473f882001-02-23 17:55:21 +00002488#ifndef XML_XML_NAMESPACE
2489 /* xml: prefix is not really a namespace */
2490 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2491 (cur[2] == 'l') && (cur[3] == ':'))
2492 return(xmlStrdup(name));
2493#endif
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002496 if (cur[0] == ':')
2497 return(xmlStrdup(name));
2498
2499 c = *cur++;
2500 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2501 buf[len++] = c;
2502 c = *cur++;
2503 }
2504 if (len >= max) {
2505 /*
2506 * Okay someone managed to make a huge name, so he's ready to pay
2507 * for the processing speed.
2508 */
2509 max = len * 2;
2510
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002511 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002512 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return(NULL);
2515 }
2516 memcpy(buffer, buf, len);
2517 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2518 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002519 xmlChar *tmp;
2520
Owen Taylor3473f882001-02-23 17:55:21 +00002521 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002522 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 if (tmp == NULL) {
2525 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002526 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002527 return(NULL);
2528 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002529 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 }
2531 buffer[len++] = c;
2532 c = *cur++;
2533 }
2534 buffer[len] = 0;
2535 }
2536
Daniel Veillard597bc482003-07-24 16:08:28 +00002537 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002538 if (buffer != NULL)
2539 xmlFree(buffer);
2540 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002541 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002542 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002543
Owen Taylor3473f882001-02-23 17:55:21 +00002544 if (buffer == NULL)
2545 ret = xmlStrndup(buf, len);
2546 else {
2547 ret = buffer;
2548 buffer = NULL;
2549 max = XML_MAX_NAMELEN;
2550 }
2551
2552
2553 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002554 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002555 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002556 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002557 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002558 }
Owen Taylor3473f882001-02-23 17:55:21 +00002559 len = 0;
2560
Daniel Veillardbb284f42002-10-16 18:02:47 +00002561 /*
2562 * Check that the first character is proper to start
2563 * a new name
2564 */
2565 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2566 ((c >= 0x41) && (c <= 0x5A)) ||
2567 (c == '_') || (c == ':'))) {
2568 int l;
2569 int first = CUR_SCHAR(cur, l);
2570
2571 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002572 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002573 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002574 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002575 }
2576 }
2577 cur++;
2578
Owen Taylor3473f882001-02-23 17:55:21 +00002579 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2580 buf[len++] = c;
2581 c = *cur++;
2582 }
2583 if (len >= max) {
2584 /*
2585 * Okay someone managed to make a huge name, so he's ready to pay
2586 * for the processing speed.
2587 */
2588 max = len * 2;
2589
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002590 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002591 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002592 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002593 return(NULL);
2594 }
2595 memcpy(buffer, buf, len);
2596 while (c != 0) { /* tested bigname2.xml */
2597 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002598 xmlChar *tmp;
2599
Owen Taylor3473f882001-02-23 17:55:21 +00002600 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002601 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002602 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002603 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002605 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002606 return(NULL);
2607 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002608 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002609 }
2610 buffer[len++] = c;
2611 c = *cur++;
2612 }
2613 buffer[len] = 0;
2614 }
2615
2616 if (buffer == NULL)
2617 ret = xmlStrndup(buf, len);
2618 else {
2619 ret = buffer;
2620 }
2621 }
2622
2623 return(ret);
2624}
2625
2626/************************************************************************
2627 * *
2628 * The parser itself *
2629 * Relates to http://www.w3.org/TR/REC-xml *
2630 * *
2631 ************************************************************************/
2632
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002633static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002634static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002635 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002636
Owen Taylor3473f882001-02-23 17:55:21 +00002637/**
2638 * xmlParseName:
2639 * @ctxt: an XML parser context
2640 *
2641 * parse an XML name.
2642 *
2643 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2644 * CombiningChar | Extender
2645 *
2646 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2647 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002648 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002649 *
2650 * Returns the Name parsed or NULL
2651 */
2652
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002653const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002654xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002655 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002656 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002657 int count = 0;
2658
2659 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002660
2661 /*
2662 * Accelerator for simple ASCII names
2663 */
2664 in = ctxt->input->cur;
2665 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2666 ((*in >= 0x41) && (*in <= 0x5A)) ||
2667 (*in == '_') || (*in == ':')) {
2668 in++;
2669 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2670 ((*in >= 0x41) && (*in <= 0x5A)) ||
2671 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002672 (*in == '_') || (*in == '-') ||
2673 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002674 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002675 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002676 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002677 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002678 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002679 ctxt->nbChars += count;
2680 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002681 if (ret == NULL)
2682 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002683 return(ret);
2684 }
2685 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002688
Daniel Veillard46de64e2002-05-29 08:21:33 +00002689/**
2690 * xmlParseNameAndCompare:
2691 * @ctxt: an XML parser context
2692 *
2693 * parse an XML name and compares for match
2694 * (specialized for endtag parsing)
2695 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002696 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2697 * and the name for mismatch
2698 */
2699
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002700static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002701xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002702 register const xmlChar *cmp = other;
2703 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002704 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002705
2706 GROW;
2707
2708 in = ctxt->input->cur;
2709 while (*in != 0 && *in == *cmp) {
2710 ++in;
2711 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002712 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002713 }
William M. Brack76e95df2003-10-18 16:20:14 +00002714 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002715 /* success */
2716 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002717 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002718 }
2719 /* failure (or end of input buffer), check with full function */
2720 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002721 /* strings coming from the dictionnary direct compare possible */
2722 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002723 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002724 }
2725 return ret;
2726}
2727
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002729xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002730 int len = 0, l;
2731 int c;
2732 int count = 0;
2733
2734 /*
2735 * Handler for more complex cases
2736 */
2737 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 c = CUR_CHAR(l);
2739 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2740 (!IS_LETTER(c) && (c != '_') &&
2741 (c != ':'))) {
2742 return(NULL);
2743 }
2744
2745 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002746 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002747 (c == '.') || (c == '-') ||
2748 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002749 (IS_COMBINING(c)) ||
2750 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002751 if (count++ > 100) {
2752 count = 0;
2753 GROW;
2754 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002755 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002756 NEXTL(l);
2757 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002758 }
Daniel Veillard96688262005-08-23 18:14:12 +00002759 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2760 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002761 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002762}
2763
2764/**
2765 * xmlParseStringName:
2766 * @ctxt: an XML parser context
2767 * @str: a pointer to the string pointer (IN/OUT)
2768 *
2769 * parse an XML name.
2770 *
2771 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2772 * CombiningChar | Extender
2773 *
2774 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2775 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002776 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002777 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002778 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002779 * is updated to the current location in the string.
2780 */
2781
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002782static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002783xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2784 xmlChar buf[XML_MAX_NAMELEN + 5];
2785 const xmlChar *cur = *str;
2786 int len = 0, l;
2787 int c;
2788
2789 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002790 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002791 (c != ':')) {
2792 return(NULL);
2793 }
2794
William M. Brack871611b2003-10-18 04:53:14 +00002795 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002796 (c == '.') || (c == '-') ||
2797 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002798 (IS_COMBINING(c)) ||
2799 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002800 COPY_BUF(l,buf,len,c);
2801 cur += l;
2802 c = CUR_SCHAR(cur, l);
2803 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2804 /*
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2807 */
2808 xmlChar *buffer;
2809 int max = len * 2;
2810
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002811 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002812 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002813 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002814 return(NULL);
2815 }
2816 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002817 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002818 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002819 (c == '.') || (c == '-') ||
2820 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002821 (IS_COMBINING(c)) ||
2822 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002823 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002824 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002825 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002826 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002827 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002828 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002829 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002831 return(NULL);
2832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002834 }
2835 COPY_BUF(l,buffer,len,c);
2836 cur += l;
2837 c = CUR_SCHAR(cur, l);
2838 }
2839 buffer[len] = 0;
2840 *str = cur;
2841 return(buffer);
2842 }
2843 }
2844 *str = cur;
2845 return(xmlStrndup(buf, len));
2846}
2847
2848/**
2849 * xmlParseNmtoken:
2850 * @ctxt: an XML parser context
2851 *
2852 * parse an XML Nmtoken.
2853 *
2854 * [7] Nmtoken ::= (NameChar)+
2855 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002856 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002857 *
2858 * Returns the Nmtoken parsed or NULL
2859 */
2860
2861xmlChar *
2862xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2863 xmlChar buf[XML_MAX_NAMELEN + 5];
2864 int len = 0, l;
2865 int c;
2866 int count = 0;
2867
2868 GROW;
2869 c = CUR_CHAR(l);
2870
William M. Brack871611b2003-10-18 04:53:14 +00002871 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002872 (c == '.') || (c == '-') ||
2873 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002874 (IS_COMBINING(c)) ||
2875 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002876 if (count++ > 100) {
2877 count = 0;
2878 GROW;
2879 }
2880 COPY_BUF(l,buf,len,c);
2881 NEXTL(l);
2882 c = CUR_CHAR(l);
2883 if (len >= XML_MAX_NAMELEN) {
2884 /*
2885 * Okay someone managed to make a huge token, so he's ready to pay
2886 * for the processing speed.
2887 */
2888 xmlChar *buffer;
2889 int max = len * 2;
2890
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002891 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002892 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002893 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002894 return(NULL);
2895 }
2896 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002897 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002898 (c == '.') || (c == '-') ||
2899 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002900 (IS_COMBINING(c)) ||
2901 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002902 if (count++ > 100) {
2903 count = 0;
2904 GROW;
2905 }
2906 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002907 xmlChar *tmp;
2908
Owen Taylor3473f882001-02-23 17:55:21 +00002909 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002910 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002911 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002913 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002914 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 return(NULL);
2916 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002917 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002918 }
2919 COPY_BUF(l,buffer,len,c);
2920 NEXTL(l);
2921 c = CUR_CHAR(l);
2922 }
2923 buffer[len] = 0;
2924 return(buffer);
2925 }
2926 }
2927 if (len == 0)
2928 return(NULL);
2929 return(xmlStrndup(buf, len));
2930}
2931
2932/**
2933 * xmlParseEntityValue:
2934 * @ctxt: an XML parser context
2935 * @orig: if non-NULL store a copy of the original entity value
2936 *
2937 * parse a value for ENTITY declarations
2938 *
2939 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2940 * "'" ([^%&'] | PEReference | Reference)* "'"
2941 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002942 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002943 */
2944
2945xmlChar *
2946xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2947 xmlChar *buf = NULL;
2948 int len = 0;
2949 int size = XML_PARSER_BUFFER_SIZE;
2950 int c, l;
2951 xmlChar stop;
2952 xmlChar *ret = NULL;
2953 const xmlChar *cur = NULL;
2954 xmlParserInputPtr input;
2955
2956 if (RAW == '"') stop = '"';
2957 else if (RAW == '\'') stop = '\'';
2958 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002959 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002960 return(NULL);
2961 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002962 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002963 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002964 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002965 return(NULL);
2966 }
2967
2968 /*
2969 * The content of the entity definition is copied in a buffer.
2970 */
2971
2972 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2973 input = ctxt->input;
2974 GROW;
2975 NEXT;
2976 c = CUR_CHAR(l);
2977 /*
2978 * NOTE: 4.4.5 Included in Literal
2979 * When a parameter entity reference appears in a literal entity
2980 * value, ... a single or double quote character in the replacement
2981 * text is always treated as a normal data character and will not
2982 * terminate the literal.
2983 * In practice it means we stop the loop only when back at parsing
2984 * the initial entity and the quote is found
2985 */
William M. Brack871611b2003-10-18 04:53:14 +00002986 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002987 (ctxt->input != input))) {
2988 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002989 xmlChar *tmp;
2990
Owen Taylor3473f882001-02-23 17:55:21 +00002991 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002992 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2993 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002994 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002995 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 return(NULL);
2997 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002998 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002999 }
3000 COPY_BUF(l,buf,len,c);
3001 NEXTL(l);
3002 /*
3003 * Pop-up of finished entities.
3004 */
3005 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3006 xmlPopInput(ctxt);
3007
3008 GROW;
3009 c = CUR_CHAR(l);
3010 if (c == 0) {
3011 GROW;
3012 c = CUR_CHAR(l);
3013 }
3014 }
3015 buf[len] = 0;
3016
3017 /*
3018 * Raise problem w.r.t. '&' and '%' being used in non-entities
3019 * reference constructs. Note Charref will be handled in
3020 * xmlStringDecodeEntities()
3021 */
3022 cur = buf;
3023 while (*cur != 0) { /* non input consuming */
3024 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3025 xmlChar *name;
3026 xmlChar tmp = *cur;
3027
3028 cur++;
3029 name = xmlParseStringName(ctxt, &cur);
3030 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003031 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003032 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003033 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003034 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003035 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3036 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003037 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003038 }
3039 if (name != NULL)
3040 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003041 if (*cur == 0)
3042 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003043 }
3044 cur++;
3045 }
3046
3047 /*
3048 * Then PEReference entities are substituted.
3049 */
3050 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003051 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003052 xmlFree(buf);
3053 } else {
3054 NEXT;
3055 /*
3056 * NOTE: 4.4.7 Bypassed
3057 * When a general entity reference appears in the EntityValue in
3058 * an entity declaration, it is bypassed and left as is.
3059 * so XML_SUBSTITUTE_REF is not set here.
3060 */
3061 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3062 0, 0, 0);
3063 if (orig != NULL)
3064 *orig = buf;
3065 else
3066 xmlFree(buf);
3067 }
3068
3069 return(ret);
3070}
3071
3072/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003073 * xmlParseAttValueComplex:
3074 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003075 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003076 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003077 *
3078 * parse a value for an attribute, this is the fallback function
3079 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003080 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003081 *
3082 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3083 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003084static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003085xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003086 xmlChar limit = 0;
3087 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003088 int len = 0;
3089 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003090 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003091 xmlChar *current = NULL;
3092 xmlEntityPtr ent;
3093
Owen Taylor3473f882001-02-23 17:55:21 +00003094 if (NXT(0) == '"') {
3095 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3096 limit = '"';
3097 NEXT;
3098 } else if (NXT(0) == '\'') {
3099 limit = '\'';
3100 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3101 NEXT;
3102 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003103 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003104 return(NULL);
3105 }
3106
3107 /*
3108 * allocate a translation buffer.
3109 */
3110 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003111 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003113
3114 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003115 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003116 */
3117 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003118 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003119 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003120 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003121 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003122 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003123 if (NXT(1) == '#') {
3124 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003125
Owen Taylor3473f882001-02-23 17:55:21 +00003126 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003127 if (ctxt->replaceEntities) {
3128 if (len > buf_size - 10) {
3129 growBuffer(buf);
3130 }
3131 buf[len++] = '&';
3132 } else {
3133 /*
3134 * The reparsing will be done in xmlStringGetNodeList()
3135 * called by the attribute() function in SAX.c
3136 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003140 buf[len++] = '&';
3141 buf[len++] = '#';
3142 buf[len++] = '3';
3143 buf[len++] = '8';
3144 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003147 if (len > buf_size - 10) {
3148 growBuffer(buf);
3149 }
Owen Taylor3473f882001-02-23 17:55:21 +00003150 len += xmlCopyChar(0, &buf[len], val);
3151 }
3152 } else {
3153 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003154 if ((ent != NULL) &&
3155 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3156 if (len > buf_size - 10) {
3157 growBuffer(buf);
3158 }
3159 if ((ctxt->replaceEntities == 0) &&
3160 (ent->content[0] == '&')) {
3161 buf[len++] = '&';
3162 buf[len++] = '#';
3163 buf[len++] = '3';
3164 buf[len++] = '8';
3165 buf[len++] = ';';
3166 } else {
3167 buf[len++] = ent->content[0];
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 } else if ((ent != NULL) &&
3170 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003171 xmlChar *rep;
3172
3173 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3174 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003175 XML_SUBSTITUTE_REF,
3176 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003177 if (rep != NULL) {
3178 current = rep;
3179 while (*current != 0) { /* non input consuming */
3180 buf[len++] = *current++;
3181 if (len > buf_size - 10) {
3182 growBuffer(buf);
3183 }
3184 }
3185 xmlFree(rep);
3186 }
3187 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003188 if (len > buf_size - 10) {
3189 growBuffer(buf);
3190 }
Owen Taylor3473f882001-02-23 17:55:21 +00003191 if (ent->content != NULL)
3192 buf[len++] = ent->content[0];
3193 }
3194 } else if (ent != NULL) {
3195 int i = xmlStrlen(ent->name);
3196 const xmlChar *cur = ent->name;
3197
3198 /*
3199 * This may look absurd but is needed to detect
3200 * entities problems
3201 */
3202 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3203 (ent->content != NULL)) {
3204 xmlChar *rep;
3205 rep = xmlStringDecodeEntities(ctxt, ent->content,
3206 XML_SUBSTITUTE_REF, 0, 0, 0);
3207 if (rep != NULL)
3208 xmlFree(rep);
3209 }
3210
3211 /*
3212 * Just output the reference
3213 */
3214 buf[len++] = '&';
3215 if (len > buf_size - i - 10) {
3216 growBuffer(buf);
3217 }
3218 for (;i > 0;i--)
3219 buf[len++] = *cur++;
3220 buf[len++] = ';';
3221 }
3222 }
3223 } else {
3224 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 if ((len != 0) || (!normalize)) {
3226 if ((!normalize) || (!in_space)) {
3227 COPY_BUF(l,buf,len,0x20);
3228 if (len > buf_size - 10) {
3229 growBuffer(buf);
3230 }
3231 }
3232 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003233 }
3234 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003235 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003236 COPY_BUF(l,buf,len,c);
3237 if (len > buf_size - 10) {
3238 growBuffer(buf);
3239 }
3240 }
3241 NEXTL(l);
3242 }
3243 GROW;
3244 c = CUR_CHAR(l);
3245 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003246 if ((in_space) && (normalize)) {
3247 while (buf[len - 1] == 0x20) len--;
3248 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003249 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003250 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003251 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003252 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003253 if ((c != 0) && (!IS_CHAR(c))) {
3254 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3255 "invalid character in attribute value\n");
3256 } else {
3257 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3258 "AttValue: ' expected\n");
3259 }
Owen Taylor3473f882001-02-23 17:55:21 +00003260 } else
3261 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003262 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003263 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003264
3265mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003266 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003267 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003268}
3269
3270/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003271 * xmlParseAttValue:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse a value for an attribute
3275 * Note: the parser won't do substitution of entities here, this
3276 * will be handled later in xmlStringGetNodeList
3277 *
3278 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3279 * "'" ([^<&'] | Reference)* "'"
3280 *
3281 * 3.3.3 Attribute-Value Normalization:
3282 * Before the value of an attribute is passed to the application or
3283 * checked for validity, the XML processor must normalize it as follows:
3284 * - a character reference is processed by appending the referenced
3285 * character to the attribute value
3286 * - an entity reference is processed by recursively processing the
3287 * replacement text of the entity
3288 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3289 * appending #x20 to the normalized value, except that only a single
3290 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3291 * parsed entity or the literal entity value of an internal parsed entity
3292 * - other characters are processed by appending them to the normalized value
3293 * If the declared value is not CDATA, then the XML processor must further
3294 * process the normalized attribute value by discarding any leading and
3295 * trailing space (#x20) characters, and by replacing sequences of space
3296 * (#x20) characters by a single space (#x20) character.
3297 * All attributes for which no declaration has been read should be treated
3298 * by a non-validating parser as if declared CDATA.
3299 *
3300 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3301 */
3302
3303
3304xmlChar *
3305xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003306 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003307 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003308}
3309
3310/**
Owen Taylor3473f882001-02-23 17:55:21 +00003311 * xmlParseSystemLiteral:
3312 * @ctxt: an XML parser context
3313 *
3314 * parse an XML Literal
3315 *
3316 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3317 *
3318 * Returns the SystemLiteral parsed or NULL
3319 */
3320
3321xmlChar *
3322xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3323 xmlChar *buf = NULL;
3324 int len = 0;
3325 int size = XML_PARSER_BUFFER_SIZE;
3326 int cur, l;
3327 xmlChar stop;
3328 int state = ctxt->instate;
3329 int count = 0;
3330
3331 SHRINK;
3332 if (RAW == '"') {
3333 NEXT;
3334 stop = '"';
3335 } else if (RAW == '\'') {
3336 NEXT;
3337 stop = '\'';
3338 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003339 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003340 return(NULL);
3341 }
3342
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003343 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003344 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003345 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003346 return(NULL);
3347 }
3348 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3349 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003350 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003351 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003352 xmlChar *tmp;
3353
Owen Taylor3473f882001-02-23 17:55:21 +00003354 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003355 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3356 if (tmp == NULL) {
3357 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003358 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003359 ctxt->instate = (xmlParserInputState) state;
3360 return(NULL);
3361 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003362 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003363 }
3364 count++;
3365 if (count > 50) {
3366 GROW;
3367 count = 0;
3368 }
3369 COPY_BUF(l,buf,len,cur);
3370 NEXTL(l);
3371 cur = CUR_CHAR(l);
3372 if (cur == 0) {
3373 GROW;
3374 SHRINK;
3375 cur = CUR_CHAR(l);
3376 }
3377 }
3378 buf[len] = 0;
3379 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003380 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003381 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003382 } else {
3383 NEXT;
3384 }
3385 return(buf);
3386}
3387
3388/**
3389 * xmlParsePubidLiteral:
3390 * @ctxt: an XML parser context
3391 *
3392 * parse an XML public literal
3393 *
3394 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3395 *
3396 * Returns the PubidLiteral parsed or NULL.
3397 */
3398
3399xmlChar *
3400xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3401 xmlChar *buf = NULL;
3402 int len = 0;
3403 int size = XML_PARSER_BUFFER_SIZE;
3404 xmlChar cur;
3405 xmlChar stop;
3406 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003407 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003408
3409 SHRINK;
3410 if (RAW == '"') {
3411 NEXT;
3412 stop = '"';
3413 } else if (RAW == '\'') {
3414 NEXT;
3415 stop = '\'';
3416 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003417 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003418 return(NULL);
3419 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003420 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003421 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003422 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003423 return(NULL);
3424 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003425 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003426 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003427 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003429 xmlChar *tmp;
3430
Owen Taylor3473f882001-02-23 17:55:21 +00003431 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003432 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3433 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003434 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003435 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003436 return(NULL);
3437 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003439 }
3440 buf[len++] = cur;
3441 count++;
3442 if (count > 50) {
3443 GROW;
3444 count = 0;
3445 }
3446 NEXT;
3447 cur = CUR;
3448 if (cur == 0) {
3449 GROW;
3450 SHRINK;
3451 cur = CUR;
3452 }
3453 }
3454 buf[len] = 0;
3455 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003457 } else {
3458 NEXT;
3459 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003460 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003461 return(buf);
3462}
3463
Daniel Veillard48b2f892001-02-25 16:11:03 +00003464void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003465
3466/*
3467 * used for the test in the inner loop of the char data testing
3468 */
3469static const unsigned char test_char_data[256] = {
3470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3502};
3503
Owen Taylor3473f882001-02-23 17:55:21 +00003504/**
3505 * xmlParseCharData:
3506 * @ctxt: an XML parser context
3507 * @cdata: int indicating whether we are within a CDATA section
3508 *
3509 * parse a CharData section.
3510 * if we are within a CDATA section ']]>' marks an end of section.
3511 *
3512 * The right angle bracket (>) may be represented using the string "&gt;",
3513 * and must, for compatibility, be escaped using "&gt;" or a character
3514 * reference when it appears in the string "]]>" in content, when that
3515 * string is not marking the end of a CDATA section.
3516 *
3517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3518 */
3519
3520void
3521xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003522 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003523 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003524 int line = ctxt->input->line;
3525 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003526 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003527
3528 SHRINK;
3529 GROW;
3530 /*
3531 * Accelerated common case where input don't need to be
3532 * modified before passing it to the handler.
3533 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003534 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003535 in = ctxt->input->cur;
3536 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003537get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003538 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003539 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003540 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003541 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003542 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003543 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003544 goto get_more_space;
3545 }
3546 if (*in == '<') {
3547 nbchar = in - ctxt->input->cur;
3548 if (nbchar > 0) {
3549 const xmlChar *tmp = ctxt->input->cur;
3550 ctxt->input->cur = in;
3551
Daniel Veillard34099b42004-11-04 17:34:35 +00003552 if ((ctxt->sax != NULL) &&
3553 (ctxt->sax->ignorableWhitespace !=
3554 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003555 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003556 if (ctxt->sax->ignorableWhitespace != NULL)
3557 ctxt->sax->ignorableWhitespace(ctxt->userData,
3558 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003559 } else {
3560 if (ctxt->sax->characters != NULL)
3561 ctxt->sax->characters(ctxt->userData,
3562 tmp, nbchar);
3563 if (*ctxt->space == -1)
3564 *ctxt->space = -2;
3565 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003566 } else if ((ctxt->sax != NULL) &&
3567 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003568 ctxt->sax->characters(ctxt->userData,
3569 tmp, nbchar);
3570 }
3571 }
3572 return;
3573 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003574
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003575get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003576 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003577 while (test_char_data[*in]) {
3578 in++;
3579 ccol++;
3580 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003581 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003582 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003583 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003584 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003585 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003586 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003587 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003588 }
3589 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003590 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003592 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003593 return;
3594 }
3595 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003596 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003597 goto get_more;
3598 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003599 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003600 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003601 if ((ctxt->sax != NULL) &&
3602 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003603 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003604 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003605 const xmlChar *tmp = ctxt->input->cur;
3606 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003607
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003608 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003609 if (ctxt->sax->ignorableWhitespace != NULL)
3610 ctxt->sax->ignorableWhitespace(ctxt->userData,
3611 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003612 } else {
3613 if (ctxt->sax->characters != NULL)
3614 ctxt->sax->characters(ctxt->userData,
3615 tmp, nbchar);
3616 if (*ctxt->space == -1)
3617 *ctxt->space = -2;
3618 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003619 line = ctxt->input->line;
3620 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003621 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003622 if (ctxt->sax->characters != NULL)
3623 ctxt->sax->characters(ctxt->userData,
3624 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003625 line = ctxt->input->line;
3626 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003627 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003628 }
3629 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003630 if (*in == 0xD) {
3631 in++;
3632 if (*in == 0xA) {
3633 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003634 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003635 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003636 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003637 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003638 in--;
3639 }
3640 if (*in == '<') {
3641 return;
3642 }
3643 if (*in == '&') {
3644 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003645 }
3646 SHRINK;
3647 GROW;
3648 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003649 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003650 nbchar = 0;
3651 }
Daniel Veillard50582112001-03-26 22:52:16 +00003652 ctxt->input->line = line;
3653 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003654 xmlParseCharDataComplex(ctxt, cdata);
3655}
3656
Daniel Veillard01c13b52002-12-10 15:19:08 +00003657/**
3658 * xmlParseCharDataComplex:
3659 * @ctxt: an XML parser context
3660 * @cdata: int indicating whether we are within a CDATA section
3661 *
3662 * parse a CharData section.this is the fallback function
3663 * of xmlParseCharData() when the parsing requires handling
3664 * of non-ASCII characters.
3665 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003666void
3667xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003668 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3669 int nbchar = 0;
3670 int cur, l;
3671 int count = 0;
3672
3673 SHRINK;
3674 GROW;
3675 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003676 while ((cur != '<') && /* checked */
3677 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003678 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003679 if ((cur == ']') && (NXT(1) == ']') &&
3680 (NXT(2) == '>')) {
3681 if (cdata) break;
3682 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003683 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003684 }
3685 }
3686 COPY_BUF(l,buf,nbchar,cur);
3687 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003688 buf[nbchar] = 0;
3689
Owen Taylor3473f882001-02-23 17:55:21 +00003690 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003691 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003692 */
3693 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003694 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003695 if (ctxt->sax->ignorableWhitespace != NULL)
3696 ctxt->sax->ignorableWhitespace(ctxt->userData,
3697 buf, nbchar);
3698 } else {
3699 if (ctxt->sax->characters != NULL)
3700 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003701 if ((ctxt->sax->characters !=
3702 ctxt->sax->ignorableWhitespace) &&
3703 (*ctxt->space == -1))
3704 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003705 }
3706 }
3707 nbchar = 0;
3708 }
3709 count++;
3710 if (count > 50) {
3711 GROW;
3712 count = 0;
3713 }
3714 NEXTL(l);
3715 cur = CUR_CHAR(l);
3716 }
3717 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003718 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003719 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003720 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003721 */
3722 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003723 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003724 if (ctxt->sax->ignorableWhitespace != NULL)
3725 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3726 } else {
3727 if (ctxt->sax->characters != NULL)
3728 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003729 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3730 (*ctxt->space == -1))
3731 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003732 }
3733 }
3734 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003735 if ((cur != 0) && (!IS_CHAR(cur))) {
3736 /* Generate the error and skip the offending character */
3737 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3738 "PCDATA invalid Char value %d\n",
3739 cur);
3740 NEXTL(l);
3741 }
Owen Taylor3473f882001-02-23 17:55:21 +00003742}
3743
3744/**
3745 * xmlParseExternalID:
3746 * @ctxt: an XML parser context
3747 * @publicID: a xmlChar** receiving PubidLiteral
3748 * @strict: indicate whether we should restrict parsing to only
3749 * production [75], see NOTE below
3750 *
3751 * Parse an External ID or a Public ID
3752 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003753 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003754 * 'PUBLIC' S PubidLiteral S SystemLiteral
3755 *
3756 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3757 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3758 *
3759 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3760 *
3761 * Returns the function returns SystemLiteral and in the second
3762 * case publicID receives PubidLiteral, is strict is off
3763 * it is possible to return NULL and have publicID set.
3764 */
3765
3766xmlChar *
3767xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3768 xmlChar *URI = NULL;
3769
3770 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003771
3772 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003773 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003774 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003775 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3777 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003778 }
3779 SKIP_BLANKS;
3780 URI = xmlParseSystemLiteral(ctxt);
3781 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003784 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003785 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003786 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003788 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 SKIP_BLANKS;
3791 *publicID = xmlParsePubidLiteral(ctxt);
3792 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003793 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 }
3795 if (strict) {
3796 /*
3797 * We don't handle [83] so "S SystemLiteral" is required.
3798 */
William M. Brack76e95df2003-10-18 16:20:14 +00003799 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003800 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003801 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003802 }
3803 } else {
3804 /*
3805 * We handle [83] so we return immediately, if
3806 * "S SystemLiteral" is not detected. From a purely parsing
3807 * point of view that's a nice mess.
3808 */
3809 const xmlChar *ptr;
3810 GROW;
3811
3812 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003813 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003814
William M. Brack76e95df2003-10-18 16:20:14 +00003815 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003816 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3817 }
3818 SKIP_BLANKS;
3819 URI = xmlParseSystemLiteral(ctxt);
3820 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003821 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 }
3824 return(URI);
3825}
3826
3827/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003828 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003829 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003830 * @buf: the already parsed part of the buffer
3831 * @len: number of bytes filles in the buffer
3832 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003833 *
3834 * Skip an XML (SGML) comment <!-- .... -->
3835 * The spec says that "For compatibility, the string "--" (double-hyphen)
3836 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003837 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003838 *
3839 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3840 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003841static void
3842xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003843 int q, ql;
3844 int r, rl;
3845 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003846 xmlParserInputPtr input = ctxt->input;
3847 int count = 0;
3848
Owen Taylor3473f882001-02-23 17:55:21 +00003849 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003850 len = 0;
3851 size = XML_PARSER_BUFFER_SIZE;
3852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3853 if (buf == NULL) {
3854 xmlErrMemory(ctxt, NULL);
3855 return;
3856 }
Owen Taylor3473f882001-02-23 17:55:21 +00003857 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003858 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003859 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003860 if (q == 0)
3861 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003862 if (!IS_CHAR(q)) {
3863 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3864 "xmlParseComment: invalid xmlChar value %d\n",
3865 q);
3866 xmlFree (buf);
3867 return;
3868 }
Owen Taylor3473f882001-02-23 17:55:21 +00003869 NEXTL(ql);
3870 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003871 if (r == 0)
3872 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00003873 if (!IS_CHAR(r)) {
3874 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3875 "xmlParseComment: invalid xmlChar value %d\n",
3876 q);
3877 xmlFree (buf);
3878 return;
3879 }
Owen Taylor3473f882001-02-23 17:55:21 +00003880 NEXTL(rl);
3881 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003882 if (cur == 0)
3883 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003884 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 ((cur != '>') ||
3886 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003887 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003888 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003889 }
3890 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003891 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003892 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003893 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3894 if (new_buf == NULL) {
3895 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003896 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003897 return;
3898 }
William M. Bracka3215c72004-07-31 16:24:01 +00003899 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 }
3901 COPY_BUF(ql,buf,len,q);
3902 q = r;
3903 ql = rl;
3904 r = cur;
3905 rl = l;
3906
3907 count++;
3908 if (count > 50) {
3909 GROW;
3910 count = 0;
3911 }
3912 NEXTL(l);
3913 cur = CUR_CHAR(l);
3914 if (cur == 0) {
3915 SHRINK;
3916 GROW;
3917 cur = CUR_CHAR(l);
3918 }
3919 }
3920 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00003921 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003922 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003923 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00003924 } else if (!IS_CHAR(cur)) {
3925 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3926 "xmlParseComment: invalid xmlChar value %d\n",
3927 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00003928 } else {
3929 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003930 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3931 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003932 }
3933 NEXT;
3934 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3935 (!ctxt->disableSAX))
3936 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003937 }
Daniel Veillardda629342007-08-01 07:49:06 +00003938 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003939 return;
3940not_terminated:
3941 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3942 "Comment not terminated\n", NULL);
3943 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00003944 return;
Owen Taylor3473f882001-02-23 17:55:21 +00003945}
Daniel Veillardda629342007-08-01 07:49:06 +00003946
Daniel Veillard4c778d82005-01-23 17:37:44 +00003947/**
3948 * xmlParseComment:
3949 * @ctxt: an XML parser context
3950 *
3951 * Skip an XML (SGML) comment <!-- .... -->
3952 * The spec says that "For compatibility, the string "--" (double-hyphen)
3953 * must not occur within comments. "
3954 *
3955 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3956 */
3957void
3958xmlParseComment(xmlParserCtxtPtr ctxt) {
3959 xmlChar *buf = NULL;
3960 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003961 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003962 xmlParserInputState state;
3963 const xmlChar *in;
3964 int nbchar = 0, ccol;
3965
3966 /*
3967 * Check that there is a comment right here.
3968 */
3969 if ((RAW != '<') || (NXT(1) != '!') ||
3970 (NXT(2) != '-') || (NXT(3) != '-')) return;
3971
3972 state = ctxt->instate;
3973 ctxt->instate = XML_PARSER_COMMENT;
3974 SKIP(4);
3975 SHRINK;
3976 GROW;
3977
3978 /*
3979 * Accelerated common case where input don't need to be
3980 * modified before passing it to the handler.
3981 */
3982 in = ctxt->input->cur;
3983 do {
3984 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003985 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003986 ctxt->input->line++; ctxt->input->col = 1;
3987 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003988 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003989 }
3990get_more:
3991 ccol = ctxt->input->col;
3992 while (((*in > '-') && (*in <= 0x7F)) ||
3993 ((*in >= 0x20) && (*in < '-')) ||
3994 (*in == 0x09)) {
3995 in++;
3996 ccol++;
3997 }
3998 ctxt->input->col = ccol;
3999 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004000 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004001 ctxt->input->line++; ctxt->input->col = 1;
4002 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004003 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004004 goto get_more;
4005 }
4006 nbchar = in - ctxt->input->cur;
4007 /*
4008 * save current set of data
4009 */
4010 if (nbchar > 0) {
4011 if ((ctxt->sax != NULL) &&
4012 (ctxt->sax->comment != NULL)) {
4013 if (buf == NULL) {
4014 if ((*in == '-') && (in[1] == '-'))
4015 size = nbchar + 1;
4016 else
4017 size = XML_PARSER_BUFFER_SIZE + nbchar;
4018 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4019 if (buf == NULL) {
4020 xmlErrMemory(ctxt, NULL);
4021 ctxt->instate = state;
4022 return;
4023 }
4024 len = 0;
4025 } else if (len + nbchar + 1 >= size) {
4026 xmlChar *new_buf;
4027 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4028 new_buf = (xmlChar *) xmlRealloc(buf,
4029 size * sizeof(xmlChar));
4030 if (new_buf == NULL) {
4031 xmlFree (buf);
4032 xmlErrMemory(ctxt, NULL);
4033 ctxt->instate = state;
4034 return;
4035 }
4036 buf = new_buf;
4037 }
4038 memcpy(&buf[len], ctxt->input->cur, nbchar);
4039 len += nbchar;
4040 buf[len] = 0;
4041 }
4042 }
4043 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004044 if (*in == 0xA) {
4045 in++;
4046 ctxt->input->line++; ctxt->input->col = 1;
4047 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004048 if (*in == 0xD) {
4049 in++;
4050 if (*in == 0xA) {
4051 ctxt->input->cur = in;
4052 in++;
4053 ctxt->input->line++; ctxt->input->col = 1;
4054 continue; /* while */
4055 }
4056 in--;
4057 }
4058 SHRINK;
4059 GROW;
4060 in = ctxt->input->cur;
4061 if (*in == '-') {
4062 if (in[1] == '-') {
4063 if (in[2] == '>') {
4064 SKIP(3);
4065 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4066 (!ctxt->disableSAX)) {
4067 if (buf != NULL)
4068 ctxt->sax->comment(ctxt->userData, buf);
4069 else
4070 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4071 }
4072 if (buf != NULL)
4073 xmlFree(buf);
4074 ctxt->instate = state;
4075 return;
4076 }
4077 if (buf != NULL)
4078 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4079 "Comment not terminated \n<!--%.50s\n",
4080 buf);
4081 else
4082 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4083 "Comment not terminated \n", NULL);
4084 in++;
4085 ctxt->input->col++;
4086 }
4087 in++;
4088 ctxt->input->col++;
4089 goto get_more;
4090 }
4091 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4092 xmlParseCommentComplex(ctxt, buf, len, size);
4093 ctxt->instate = state;
4094 return;
4095}
4096
Owen Taylor3473f882001-02-23 17:55:21 +00004097
4098/**
4099 * xmlParsePITarget:
4100 * @ctxt: an XML parser context
4101 *
4102 * parse the name of a PI
4103 *
4104 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4105 *
4106 * Returns the PITarget name or NULL
4107 */
4108
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004109const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004110xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004111 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004112
4113 name = xmlParseName(ctxt);
4114 if ((name != NULL) &&
4115 ((name[0] == 'x') || (name[0] == 'X')) &&
4116 ((name[1] == 'm') || (name[1] == 'M')) &&
4117 ((name[2] == 'l') || (name[2] == 'L'))) {
4118 int i;
4119 if ((name[0] == 'x') && (name[1] == 'm') &&
4120 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004121 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004122 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004123 return(name);
4124 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004126 return(name);
4127 }
4128 for (i = 0;;i++) {
4129 if (xmlW3CPIs[i] == NULL) break;
4130 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4131 return(name);
4132 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004133 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4134 "xmlParsePITarget: invalid name prefix 'xml'\n",
4135 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004136 }
4137 return(name);
4138}
4139
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004140#ifdef LIBXML_CATALOG_ENABLED
4141/**
4142 * xmlParseCatalogPI:
4143 * @ctxt: an XML parser context
4144 * @catalog: the PI value string
4145 *
4146 * parse an XML Catalog Processing Instruction.
4147 *
4148 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4149 *
4150 * Occurs only if allowed by the user and if happening in the Misc
4151 * part of the document before any doctype informations
4152 * This will add the given catalog to the parsing context in order
4153 * to be used if there is a resolution need further down in the document
4154 */
4155
4156static void
4157xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4158 xmlChar *URL = NULL;
4159 const xmlChar *tmp, *base;
4160 xmlChar marker;
4161
4162 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004163 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004164 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4165 goto error;
4166 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004167 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004168 if (*tmp != '=') {
4169 return;
4170 }
4171 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004172 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004173 marker = *tmp;
4174 if ((marker != '\'') && (marker != '"'))
4175 goto error;
4176 tmp++;
4177 base = tmp;
4178 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4179 if (*tmp == 0)
4180 goto error;
4181 URL = xmlStrndup(base, tmp - base);
4182 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004183 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004184 if (*tmp != 0)
4185 goto error;
4186
4187 if (URL != NULL) {
4188 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4189 xmlFree(URL);
4190 }
4191 return;
4192
4193error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004194 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4195 "Catalog PI syntax error: %s\n",
4196 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004197 if (URL != NULL)
4198 xmlFree(URL);
4199}
4200#endif
4201
Owen Taylor3473f882001-02-23 17:55:21 +00004202/**
4203 * xmlParsePI:
4204 * @ctxt: an XML parser context
4205 *
4206 * parse an XML Processing Instruction.
4207 *
4208 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4209 *
4210 * The processing is transfered to SAX once parsed.
4211 */
4212
4213void
4214xmlParsePI(xmlParserCtxtPtr ctxt) {
4215 xmlChar *buf = NULL;
4216 int len = 0;
4217 int size = XML_PARSER_BUFFER_SIZE;
4218 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004219 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 xmlParserInputState state;
4221 int count = 0;
4222
4223 if ((RAW == '<') && (NXT(1) == '?')) {
4224 xmlParserInputPtr input = ctxt->input;
4225 state = ctxt->instate;
4226 ctxt->instate = XML_PARSER_PI;
4227 /*
4228 * this is a Processing Instruction.
4229 */
4230 SKIP(2);
4231 SHRINK;
4232
4233 /*
4234 * Parse the target name and check for special support like
4235 * namespace.
4236 */
4237 target = xmlParsePITarget(ctxt);
4238 if (target != NULL) {
4239 if ((RAW == '?') && (NXT(1) == '>')) {
4240 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004241 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4242 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 SKIP(2);
4245
4246 /*
4247 * SAX: PI detected.
4248 */
4249 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4250 (ctxt->sax->processingInstruction != NULL))
4251 ctxt->sax->processingInstruction(ctxt->userData,
4252 target, NULL);
4253 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004254 return;
4255 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004256 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004257 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004258 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 ctxt->instate = state;
4260 return;
4261 }
4262 cur = CUR;
4263 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004264 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4265 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 SKIP_BLANKS;
4268 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004269 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004270 ((cur != '?') || (NXT(1) != '>'))) {
4271 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004272 xmlChar *tmp;
4273
Owen Taylor3473f882001-02-23 17:55:21 +00004274 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004275 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4276 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004277 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004278 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004279 ctxt->instate = state;
4280 return;
4281 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004282 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004283 }
4284 count++;
4285 if (count > 50) {
4286 GROW;
4287 count = 0;
4288 }
4289 COPY_BUF(l,buf,len,cur);
4290 NEXTL(l);
4291 cur = CUR_CHAR(l);
4292 if (cur == 0) {
4293 SHRINK;
4294 GROW;
4295 cur = CUR_CHAR(l);
4296 }
4297 }
4298 buf[len] = 0;
4299 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004300 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4301 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004302 } else {
4303 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4305 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307 SKIP(2);
4308
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004309#ifdef LIBXML_CATALOG_ENABLED
4310 if (((state == XML_PARSER_MISC) ||
4311 (state == XML_PARSER_START)) &&
4312 (xmlStrEqual(target, XML_CATALOG_PI))) {
4313 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4314 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4315 (allow == XML_CATA_ALLOW_ALL))
4316 xmlParseCatalogPI(ctxt, buf);
4317 }
4318#endif
4319
4320
Owen Taylor3473f882001-02-23 17:55:21 +00004321 /*
4322 * SAX: PI detected.
4323 */
4324 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4325 (ctxt->sax->processingInstruction != NULL))
4326 ctxt->sax->processingInstruction(ctxt->userData,
4327 target, buf);
4328 }
4329 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004330 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 ctxt->instate = state;
4334 }
4335}
4336
4337/**
4338 * xmlParseNotationDecl:
4339 * @ctxt: an XML parser context
4340 *
4341 * parse a notation declaration
4342 *
4343 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4344 *
4345 * Hence there is actually 3 choices:
4346 * 'PUBLIC' S PubidLiteral
4347 * 'PUBLIC' S PubidLiteral S SystemLiteral
4348 * and 'SYSTEM' S SystemLiteral
4349 *
4350 * See the NOTE on xmlParseExternalID().
4351 */
4352
4353void
4354xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004355 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 xmlChar *Pubid;
4357 xmlChar *Systemid;
4358
Daniel Veillarda07050d2003-10-19 14:46:32 +00004359 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004360 xmlParserInputPtr input = ctxt->input;
4361 SHRINK;
4362 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004363 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4365 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004366 return;
4367 }
4368 SKIP_BLANKS;
4369
Daniel Veillard76d66f42001-05-16 21:05:17 +00004370 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004371 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004372 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004373 return;
4374 }
William M. Brack76e95df2003-10-18 16:20:14 +00004375 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004377 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004378 return;
4379 }
4380 SKIP_BLANKS;
4381
4382 /*
4383 * Parse the IDs.
4384 */
4385 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4386 SKIP_BLANKS;
4387
4388 if (RAW == '>') {
4389 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004390 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4391 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004392 }
4393 NEXT;
4394 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4395 (ctxt->sax->notationDecl != NULL))
4396 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004398 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
Owen Taylor3473f882001-02-23 17:55:21 +00004400 if (Systemid != NULL) xmlFree(Systemid);
4401 if (Pubid != NULL) xmlFree(Pubid);
4402 }
4403}
4404
4405/**
4406 * xmlParseEntityDecl:
4407 * @ctxt: an XML parser context
4408 *
4409 * parse <!ENTITY declarations
4410 *
4411 * [70] EntityDecl ::= GEDecl | PEDecl
4412 *
4413 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4414 *
4415 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4416 *
4417 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4418 *
4419 * [74] PEDef ::= EntityValue | ExternalID
4420 *
4421 * [76] NDataDecl ::= S 'NDATA' S Name
4422 *
4423 * [ VC: Notation Declared ]
4424 * The Name must match the declared name of a notation.
4425 */
4426
4427void
4428xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004429 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004430 xmlChar *value = NULL;
4431 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004432 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004433 int isParameter = 0;
4434 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004435 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004436
Daniel Veillard4c778d82005-01-23 17:37:44 +00004437 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004438 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004439 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004440 SHRINK;
4441 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004442 skipped = SKIP_BLANKS;
4443 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004444 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4445 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004446 }
Owen Taylor3473f882001-02-23 17:55:21 +00004447
4448 if (RAW == '%') {
4449 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004450 skipped = SKIP_BLANKS;
4451 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4453 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004454 }
Owen Taylor3473f882001-02-23 17:55:21 +00004455 isParameter = 1;
4456 }
4457
Daniel Veillard76d66f42001-05-16 21:05:17 +00004458 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004459 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4461 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004462 return;
4463 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004464 skipped = SKIP_BLANKS;
4465 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004466 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4467 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004468 }
Owen Taylor3473f882001-02-23 17:55:21 +00004469
Daniel Veillardf5582f12002-06-11 10:08:16 +00004470 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004471 /*
4472 * handle the various case of definitions...
4473 */
4474 if (isParameter) {
4475 if ((RAW == '"') || (RAW == '\'')) {
4476 value = xmlParseEntityValue(ctxt, &orig);
4477 if (value) {
4478 if ((ctxt->sax != NULL) &&
4479 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4480 ctxt->sax->entityDecl(ctxt->userData, name,
4481 XML_INTERNAL_PARAMETER_ENTITY,
4482 NULL, NULL, value);
4483 }
4484 } else {
4485 URI = xmlParseExternalID(ctxt, &literal, 1);
4486 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004487 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004488 }
4489 if (URI) {
4490 xmlURIPtr uri;
4491
4492 uri = xmlParseURI((const char *) URI);
4493 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004494 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4495 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004496 /*
4497 * This really ought to be a well formedness error
4498 * but the XML Core WG decided otherwise c.f. issue
4499 * E26 of the XML erratas.
4500 */
Owen Taylor3473f882001-02-23 17:55:21 +00004501 } else {
4502 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004503 /*
4504 * Okay this is foolish to block those but not
4505 * invalid URIs.
4506 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004507 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004508 } else {
4509 if ((ctxt->sax != NULL) &&
4510 (!ctxt->disableSAX) &&
4511 (ctxt->sax->entityDecl != NULL))
4512 ctxt->sax->entityDecl(ctxt->userData, name,
4513 XML_EXTERNAL_PARAMETER_ENTITY,
4514 literal, URI, NULL);
4515 }
4516 xmlFreeURI(uri);
4517 }
4518 }
4519 }
4520 } else {
4521 if ((RAW == '"') || (RAW == '\'')) {
4522 value = xmlParseEntityValue(ctxt, &orig);
4523 if ((ctxt->sax != NULL) &&
4524 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4525 ctxt->sax->entityDecl(ctxt->userData, name,
4526 XML_INTERNAL_GENERAL_ENTITY,
4527 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004528 /*
4529 * For expat compatibility in SAX mode.
4530 */
4531 if ((ctxt->myDoc == NULL) ||
4532 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4533 if (ctxt->myDoc == NULL) {
4534 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4535 }
4536 if (ctxt->myDoc->intSubset == NULL)
4537 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4538 BAD_CAST "fake", NULL, NULL);
4539
Daniel Veillard1af9a412003-08-20 22:54:39 +00004540 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4541 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004542 }
Owen Taylor3473f882001-02-23 17:55:21 +00004543 } else {
4544 URI = xmlParseExternalID(ctxt, &literal, 1);
4545 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004546 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004547 }
4548 if (URI) {
4549 xmlURIPtr uri;
4550
4551 uri = xmlParseURI((const char *)URI);
4552 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004553 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4554 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004555 /*
4556 * This really ought to be a well formedness error
4557 * but the XML Core WG decided otherwise c.f. issue
4558 * E26 of the XML erratas.
4559 */
Owen Taylor3473f882001-02-23 17:55:21 +00004560 } else {
4561 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004562 /*
4563 * Okay this is foolish to block those but not
4564 * invalid URIs.
4565 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004566 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004567 }
4568 xmlFreeURI(uri);
4569 }
4570 }
William M. Brack76e95df2003-10-18 16:20:14 +00004571 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004572 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4573 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004574 }
4575 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004576 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004577 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004578 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004579 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4580 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004581 }
4582 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004583 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004584 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4585 (ctxt->sax->unparsedEntityDecl != NULL))
4586 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4587 literal, URI, ndata);
4588 } else {
4589 if ((ctxt->sax != NULL) &&
4590 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4591 ctxt->sax->entityDecl(ctxt->userData, name,
4592 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4593 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004594 /*
4595 * For expat compatibility in SAX mode.
4596 * assuming the entity repalcement was asked for
4597 */
4598 if ((ctxt->replaceEntities != 0) &&
4599 ((ctxt->myDoc == NULL) ||
4600 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4601 if (ctxt->myDoc == NULL) {
4602 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4603 }
4604
4605 if (ctxt->myDoc->intSubset == NULL)
4606 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4607 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004608 xmlSAX2EntityDecl(ctxt, name,
4609 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4610 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004611 }
Owen Taylor3473f882001-02-23 17:55:21 +00004612 }
4613 }
4614 }
4615 SKIP_BLANKS;
4616 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004617 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004618 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004619 } else {
4620 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004621 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4622 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004623 }
4624 NEXT;
4625 }
4626 if (orig != NULL) {
4627 /*
4628 * Ugly mechanism to save the raw entity value.
4629 */
4630 xmlEntityPtr cur = NULL;
4631
4632 if (isParameter) {
4633 if ((ctxt->sax != NULL) &&
4634 (ctxt->sax->getParameterEntity != NULL))
4635 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4636 } else {
4637 if ((ctxt->sax != NULL) &&
4638 (ctxt->sax->getEntity != NULL))
4639 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004640 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004641 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004642 }
Owen Taylor3473f882001-02-23 17:55:21 +00004643 }
4644 if (cur != NULL) {
4645 if (cur->orig != NULL)
4646 xmlFree(orig);
4647 else
4648 cur->orig = orig;
4649 } else
4650 xmlFree(orig);
4651 }
Owen Taylor3473f882001-02-23 17:55:21 +00004652 if (value != NULL) xmlFree(value);
4653 if (URI != NULL) xmlFree(URI);
4654 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004655 }
4656}
4657
4658/**
4659 * xmlParseDefaultDecl:
4660 * @ctxt: an XML parser context
4661 * @value: Receive a possible fixed default value for the attribute
4662 *
4663 * Parse an attribute default declaration
4664 *
4665 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4666 *
4667 * [ VC: Required Attribute ]
4668 * if the default declaration is the keyword #REQUIRED, then the
4669 * attribute must be specified for all elements of the type in the
4670 * attribute-list declaration.
4671 *
4672 * [ VC: Attribute Default Legal ]
4673 * The declared default value must meet the lexical constraints of
4674 * the declared attribute type c.f. xmlValidateAttributeDecl()
4675 *
4676 * [ VC: Fixed Attribute Default ]
4677 * if an attribute has a default value declared with the #FIXED
4678 * keyword, instances of that attribute must match the default value.
4679 *
4680 * [ WFC: No < in Attribute Values ]
4681 * handled in xmlParseAttValue()
4682 *
4683 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4684 * or XML_ATTRIBUTE_FIXED.
4685 */
4686
4687int
4688xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4689 int val;
4690 xmlChar *ret;
4691
4692 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004693 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004694 SKIP(9);
4695 return(XML_ATTRIBUTE_REQUIRED);
4696 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004697 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004698 SKIP(8);
4699 return(XML_ATTRIBUTE_IMPLIED);
4700 }
4701 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004702 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004703 SKIP(6);
4704 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004705 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4707 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004708 }
4709 SKIP_BLANKS;
4710 }
4711 ret = xmlParseAttValue(ctxt);
4712 ctxt->instate = XML_PARSER_DTD;
4713 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004714 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004715 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004716 } else
4717 *value = ret;
4718 return(val);
4719}
4720
4721/**
4722 * xmlParseNotationType:
4723 * @ctxt: an XML parser context
4724 *
4725 * parse an Notation attribute type.
4726 *
4727 * Note: the leading 'NOTATION' S part has already being parsed...
4728 *
4729 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4730 *
4731 * [ VC: Notation Attributes ]
4732 * Values of this type must match one of the notation names included
4733 * in the declaration; all notation names in the declaration must be declared.
4734 *
4735 * Returns: the notation attribute tree built while parsing
4736 */
4737
4738xmlEnumerationPtr
4739xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004741 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4742
4743 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004744 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 return(NULL);
4746 }
4747 SHRINK;
4748 do {
4749 NEXT;
4750 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004751 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004752 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004753 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4754 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004755 return(ret);
4756 }
4757 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004758 if (cur == NULL) return(ret);
4759 if (last == NULL) ret = last = cur;
4760 else {
4761 last->next = cur;
4762 last = cur;
4763 }
4764 SKIP_BLANKS;
4765 } while (RAW == '|');
4766 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004767 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004768 if ((last != NULL) && (last != ret))
4769 xmlFreeEnumeration(last);
4770 return(ret);
4771 }
4772 NEXT;
4773 return(ret);
4774}
4775
4776/**
4777 * xmlParseEnumerationType:
4778 * @ctxt: an XML parser context
4779 *
4780 * parse an Enumeration attribute type.
4781 *
4782 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4783 *
4784 * [ VC: Enumeration ]
4785 * Values of this type must match one of the Nmtoken tokens in
4786 * the declaration
4787 *
4788 * Returns: the enumeration attribute tree built while parsing
4789 */
4790
4791xmlEnumerationPtr
4792xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4793 xmlChar *name;
4794 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4795
4796 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004797 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004798 return(NULL);
4799 }
4800 SHRINK;
4801 do {
4802 NEXT;
4803 SKIP_BLANKS;
4804 name = xmlParseNmtoken(ctxt);
4805 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004806 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004807 return(ret);
4808 }
4809 cur = xmlCreateEnumeration(name);
4810 xmlFree(name);
4811 if (cur == NULL) return(ret);
4812 if (last == NULL) ret = last = cur;
4813 else {
4814 last->next = cur;
4815 last = cur;
4816 }
4817 SKIP_BLANKS;
4818 } while (RAW == '|');
4819 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004820 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(ret);
4822 }
4823 NEXT;
4824 return(ret);
4825}
4826
4827/**
4828 * xmlParseEnumeratedType:
4829 * @ctxt: an XML parser context
4830 * @tree: the enumeration tree built while parsing
4831 *
4832 * parse an Enumerated attribute type.
4833 *
4834 * [57] EnumeratedType ::= NotationType | Enumeration
4835 *
4836 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4837 *
4838 *
4839 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4840 */
4841
4842int
4843xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004844 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004845 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 return(0);
4850 }
4851 SKIP_BLANKS;
4852 *tree = xmlParseNotationType(ctxt);
4853 if (*tree == NULL) return(0);
4854 return(XML_ATTRIBUTE_NOTATION);
4855 }
4856 *tree = xmlParseEnumerationType(ctxt);
4857 if (*tree == NULL) return(0);
4858 return(XML_ATTRIBUTE_ENUMERATION);
4859}
4860
4861/**
4862 * xmlParseAttributeType:
4863 * @ctxt: an XML parser context
4864 * @tree: the enumeration tree built while parsing
4865 *
4866 * parse the Attribute list def for an element
4867 *
4868 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4869 *
4870 * [55] StringType ::= 'CDATA'
4871 *
4872 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4873 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4874 *
4875 * Validity constraints for attribute values syntax are checked in
4876 * xmlValidateAttributeValue()
4877 *
4878 * [ VC: ID ]
4879 * Values of type ID must match the Name production. A name must not
4880 * appear more than once in an XML document as a value of this type;
4881 * i.e., ID values must uniquely identify the elements which bear them.
4882 *
4883 * [ VC: One ID per Element Type ]
4884 * No element type may have more than one ID attribute specified.
4885 *
4886 * [ VC: ID Attribute Default ]
4887 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4888 *
4889 * [ VC: IDREF ]
4890 * Values of type IDREF must match the Name production, and values
4891 * of type IDREFS must match Names; each IDREF Name must match the value
4892 * of an ID attribute on some element in the XML document; i.e. IDREF
4893 * values must match the value of some ID attribute.
4894 *
4895 * [ VC: Entity Name ]
4896 * Values of type ENTITY must match the Name production, values
4897 * of type ENTITIES must match Names; each Entity Name must match the
4898 * name of an unparsed entity declared in the DTD.
4899 *
4900 * [ VC: Name Token ]
4901 * Values of type NMTOKEN must match the Nmtoken production; values
4902 * of type NMTOKENS must match Nmtokens.
4903 *
4904 * Returns the attribute type
4905 */
4906int
4907xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4908 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004909 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004910 SKIP(5);
4911 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004912 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004913 SKIP(6);
4914 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004915 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004916 SKIP(5);
4917 return(XML_ATTRIBUTE_IDREF);
4918 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4919 SKIP(2);
4920 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004921 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004922 SKIP(6);
4923 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004924 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004925 SKIP(8);
4926 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004927 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004928 SKIP(8);
4929 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004930 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004931 SKIP(7);
4932 return(XML_ATTRIBUTE_NMTOKEN);
4933 }
4934 return(xmlParseEnumeratedType(ctxt, tree));
4935}
4936
4937/**
4938 * xmlParseAttributeListDecl:
4939 * @ctxt: an XML parser context
4940 *
4941 * : parse the Attribute list def for an element
4942 *
4943 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4944 *
4945 * [53] AttDef ::= S Name S AttType S DefaultDecl
4946 *
4947 */
4948void
4949xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004950 const xmlChar *elemName;
4951 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004952 xmlEnumerationPtr tree;
4953
Daniel Veillarda07050d2003-10-19 14:46:32 +00004954 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004955 xmlParserInputPtr input = ctxt->input;
4956
4957 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004958 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004960 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004961 }
4962 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004963 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004964 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004965 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4966 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 return;
4968 }
4969 SKIP_BLANKS;
4970 GROW;
4971 while (RAW != '>') {
4972 const xmlChar *check = CUR_PTR;
4973 int type;
4974 int def;
4975 xmlChar *defaultValue = NULL;
4976
4977 GROW;
4978 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004979 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004980 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004981 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4982 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004983 break;
4984 }
4985 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004986 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004987 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004988 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004989 break;
4990 }
4991 SKIP_BLANKS;
4992
4993 type = xmlParseAttributeType(ctxt, &tree);
4994 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004995 break;
4996 }
4997
4998 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004999 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 if (tree != NULL)
5003 xmlFreeEnumeration(tree);
5004 break;
5005 }
5006 SKIP_BLANKS;
5007
5008 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5009 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005010 if (defaultValue != NULL)
5011 xmlFree(defaultValue);
5012 if (tree != NULL)
5013 xmlFreeEnumeration(tree);
5014 break;
5015 }
5016
5017 GROW;
5018 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005019 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005021 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005022 if (defaultValue != NULL)
5023 xmlFree(defaultValue);
5024 if (tree != NULL)
5025 xmlFreeEnumeration(tree);
5026 break;
5027 }
5028 SKIP_BLANKS;
5029 }
5030 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005031 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5032 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005033 if (defaultValue != NULL)
5034 xmlFree(defaultValue);
5035 if (tree != NULL)
5036 xmlFreeEnumeration(tree);
5037 break;
5038 }
5039 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5040 (ctxt->sax->attributeDecl != NULL))
5041 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5042 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005043 else if (tree != NULL)
5044 xmlFreeEnumeration(tree);
5045
5046 if ((ctxt->sax2) && (defaultValue != NULL) &&
5047 (def != XML_ATTRIBUTE_IMPLIED) &&
5048 (def != XML_ATTRIBUTE_REQUIRED)) {
5049 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5050 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005051 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005052 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5053 }
Owen Taylor3473f882001-02-23 17:55:21 +00005054 if (defaultValue != NULL)
5055 xmlFree(defaultValue);
5056 GROW;
5057 }
5058 if (RAW == '>') {
5059 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005060 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5061 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005062 }
5063 NEXT;
5064 }
Owen Taylor3473f882001-02-23 17:55:21 +00005065 }
5066}
5067
5068/**
5069 * xmlParseElementMixedContentDecl:
5070 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005071 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005072 *
5073 * parse the declaration for a Mixed Element content
5074 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5075 *
5076 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5077 * '(' S? '#PCDATA' S? ')'
5078 *
5079 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5080 *
5081 * [ VC: No Duplicate Types ]
5082 * The same name must not appear more than once in a single
5083 * mixed-content declaration.
5084 *
5085 * returns: the list of the xmlElementContentPtr describing the element choices
5086 */
5087xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005088xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005089 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005090 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005091
5092 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005093 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005094 SKIP(7);
5095 SKIP_BLANKS;
5096 SHRINK;
5097 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005098 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005099 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5100"Element content declaration doesn't start and stop in the same entity\n",
5101 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005102 }
Owen Taylor3473f882001-02-23 17:55:21 +00005103 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005104 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005105 if (RAW == '*') {
5106 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5107 NEXT;
5108 }
5109 return(ret);
5110 }
5111 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005112 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005113 if (ret == NULL) return(NULL);
5114 }
5115 while (RAW == '|') {
5116 NEXT;
5117 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005118 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005119 if (ret == NULL) return(NULL);
5120 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005121 if (cur != NULL)
5122 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005123 cur = ret;
5124 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005125 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005127 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005128 if (n->c1 != NULL)
5129 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005130 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005131 if (n != NULL)
5132 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005133 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005134 }
5135 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005136 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005137 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005139 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005140 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005141 return(NULL);
5142 }
5143 SKIP_BLANKS;
5144 GROW;
5145 }
5146 if ((RAW == ')') && (NXT(1) == '*')) {
5147 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005148 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005149 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005150 if (cur->c2 != NULL)
5151 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
5153 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005154 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005155 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5156"Element content declaration doesn't start and stop in the same entity\n",
5157 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 SKIP(2);
5160 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005161 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005162 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 return(NULL);
5164 }
5165
5166 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005167 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005168 }
5169 return(ret);
5170}
5171
5172/**
5173 * xmlParseElementChildrenContentDecl:
5174 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005175 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005176 *
5177 * parse the declaration for a Mixed Element content
5178 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5179 *
5180 *
5181 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5182 *
5183 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5184 *
5185 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5186 *
5187 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5188 *
5189 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5190 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005191 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005192 * opening or closing parentheses in a choice, seq, or Mixed
5193 * construct is contained in the replacement text for a parameter
5194 * entity, both must be contained in the same replacement text. For
5195 * interoperability, if a parameter-entity reference appears in a
5196 * choice, seq, or Mixed construct, its replacement text should not
5197 * be empty, and neither the first nor last non-blank character of
5198 * the replacement text should be a connector (| or ,).
5199 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005200 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005201 * hierarchy.
5202 */
5203xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005204xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005205 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005206 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005207 xmlChar type = 0;
5208
5209 SKIP_BLANKS;
5210 GROW;
5211 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005212 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005213
Owen Taylor3473f882001-02-23 17:55:21 +00005214 /* Recurse on first child */
5215 NEXT;
5216 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005217 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005218 SKIP_BLANKS;
5219 GROW;
5220 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005221 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005222 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005223 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005224 return(NULL);
5225 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005226 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005227 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005229 return(NULL);
5230 }
Owen Taylor3473f882001-02-23 17:55:21 +00005231 GROW;
5232 if (RAW == '?') {
5233 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5234 NEXT;
5235 } else if (RAW == '*') {
5236 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5237 NEXT;
5238 } else if (RAW == '+') {
5239 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5240 NEXT;
5241 } else {
5242 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 GROW;
5245 }
5246 SKIP_BLANKS;
5247 SHRINK;
5248 while (RAW != ')') {
5249 /*
5250 * Each loop we parse one separator and one element.
5251 */
5252 if (RAW == ',') {
5253 if (type == 0) type = CUR;
5254
5255 /*
5256 * Detect "Name | Name , Name" error
5257 */
5258 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005260 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005261 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005262 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005263 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005264 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005265 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 return(NULL);
5267 }
5268 NEXT;
5269
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005272 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005273 xmlFreeDocElementContent(ctxt->myDoc, last);
5274 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005275 return(NULL);
5276 }
5277 if (last == NULL) {
5278 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005279 if (ret != NULL)
5280 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005281 ret = cur = op;
5282 } else {
5283 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005284 if (op != NULL)
5285 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005286 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005287 if (last != NULL)
5288 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005289 cur =op;
5290 last = NULL;
5291 }
5292 } else if (RAW == '|') {
5293 if (type == 0) type = CUR;
5294
5295 /*
5296 * Detect "Name , Name | Name" error
5297 */
5298 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005300 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005301 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005302 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005303 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005304 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005305 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005306 return(NULL);
5307 }
5308 NEXT;
5309
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005310 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005311 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005312 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005313 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005314 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005315 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 return(NULL);
5317 }
5318 if (last == NULL) {
5319 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005320 if (ret != NULL)
5321 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005322 ret = cur = op;
5323 } else {
5324 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005325 if (op != NULL)
5326 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005327 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005328 if (last != NULL)
5329 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005330 cur =op;
5331 last = NULL;
5332 }
5333 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005334 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005335 if ((last != NULL) && (last != ret))
5336 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005337 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005338 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005339 return(NULL);
5340 }
5341 GROW;
5342 SKIP_BLANKS;
5343 GROW;
5344 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005345 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005346 /* Recurse on second child */
5347 NEXT;
5348 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005349 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005350 SKIP_BLANKS;
5351 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005352 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005353 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005354 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005355 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005356 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005357 return(NULL);
5358 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005359 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 if (RAW == '?') {
5361 last->ocur = XML_ELEMENT_CONTENT_OPT;
5362 NEXT;
5363 } else if (RAW == '*') {
5364 last->ocur = XML_ELEMENT_CONTENT_MULT;
5365 NEXT;
5366 } else if (RAW == '+') {
5367 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5368 NEXT;
5369 } else {
5370 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5371 }
5372 }
5373 SKIP_BLANKS;
5374 GROW;
5375 }
5376 if ((cur != NULL) && (last != NULL)) {
5377 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005378 if (last != NULL)
5379 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005380 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005381 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005382 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5383"Element content declaration doesn't start and stop in the same entity\n",
5384 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005385 }
Owen Taylor3473f882001-02-23 17:55:21 +00005386 NEXT;
5387 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005388 if (ret != NULL) {
5389 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5390 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5391 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5392 else
5393 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5394 }
Owen Taylor3473f882001-02-23 17:55:21 +00005395 NEXT;
5396 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005397 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005398 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005399 cur = ret;
5400 /*
5401 * Some normalization:
5402 * (a | b* | c?)* == (a | b | c)*
5403 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005404 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005405 if ((cur->c1 != NULL) &&
5406 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5407 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5408 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5409 if ((cur->c2 != NULL) &&
5410 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5411 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5412 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5413 cur = cur->c2;
5414 }
5415 }
Owen Taylor3473f882001-02-23 17:55:21 +00005416 NEXT;
5417 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005418 if (ret != NULL) {
5419 int found = 0;
5420
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005421 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5422 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5423 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005424 else
5425 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005426 /*
5427 * Some normalization:
5428 * (a | b*)+ == (a | b)*
5429 * (a | b?)+ == (a | b)*
5430 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005431 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005432 if ((cur->c1 != NULL) &&
5433 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5434 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5435 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5436 found = 1;
5437 }
5438 if ((cur->c2 != NULL) &&
5439 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5440 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5441 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5442 found = 1;
5443 }
5444 cur = cur->c2;
5445 }
5446 if (found)
5447 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5448 }
Owen Taylor3473f882001-02-23 17:55:21 +00005449 NEXT;
5450 }
5451 return(ret);
5452}
5453
5454/**
5455 * xmlParseElementContentDecl:
5456 * @ctxt: an XML parser context
5457 * @name: the name of the element being defined.
5458 * @result: the Element Content pointer will be stored here if any
5459 *
5460 * parse the declaration for an Element content either Mixed or Children,
5461 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5462 *
5463 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5464 *
5465 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5466 */
5467
5468int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005469xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005470 xmlElementContentPtr *result) {
5471
5472 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005474 int res;
5475
5476 *result = NULL;
5477
5478 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005479 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005480 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005481 return(-1);
5482 }
5483 NEXT;
5484 GROW;
5485 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005486 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005488 res = XML_ELEMENT_TYPE_MIXED;
5489 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005490 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005491 res = XML_ELEMENT_TYPE_ELEMENT;
5492 }
Owen Taylor3473f882001-02-23 17:55:21 +00005493 SKIP_BLANKS;
5494 *result = tree;
5495 return(res);
5496}
5497
5498/**
5499 * xmlParseElementDecl:
5500 * @ctxt: an XML parser context
5501 *
5502 * parse an Element declaration.
5503 *
5504 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5505 *
5506 * [ VC: Unique Element Type Declaration ]
5507 * No element type may be declared more than once
5508 *
5509 * Returns the type of the element, or -1 in case of error
5510 */
5511int
5512xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005513 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005514 int ret = -1;
5515 xmlElementContentPtr content = NULL;
5516
Daniel Veillard4c778d82005-01-23 17:37:44 +00005517 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005518 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005519 xmlParserInputPtr input = ctxt->input;
5520
5521 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005522 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005523 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5524 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005525 }
5526 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005527 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005528 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005529 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5530 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005531 return(-1);
5532 }
5533 while ((RAW == 0) && (ctxt->inputNr > 1))
5534 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005535 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005540 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005541 SKIP(5);
5542 /*
5543 * Element must always be empty.
5544 */
5545 ret = XML_ELEMENT_TYPE_EMPTY;
5546 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5547 (NXT(2) == 'Y')) {
5548 SKIP(3);
5549 /*
5550 * Element is a generic container.
5551 */
5552 ret = XML_ELEMENT_TYPE_ANY;
5553 } else if (RAW == '(') {
5554 ret = xmlParseElementContentDecl(ctxt, name, &content);
5555 } else {
5556 /*
5557 * [ WFC: PEs in Internal Subset ] error handling.
5558 */
5559 if ((RAW == '%') && (ctxt->external == 0) &&
5560 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005561 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005562 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005563 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005564 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005565 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5566 }
Owen Taylor3473f882001-02-23 17:55:21 +00005567 return(-1);
5568 }
5569
5570 SKIP_BLANKS;
5571 /*
5572 * Pop-up of finished entities.
5573 */
5574 while ((RAW == 0) && (ctxt->inputNr > 1))
5575 xmlPopInput(ctxt);
5576 SKIP_BLANKS;
5577
5578 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005579 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005580 if (content != NULL) {
5581 xmlFreeDocElementContent(ctxt->myDoc, content);
5582 }
Owen Taylor3473f882001-02-23 17:55:21 +00005583 } else {
5584 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5586 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
5588
5589 NEXT;
5590 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005591 (ctxt->sax->elementDecl != NULL)) {
5592 if (content != NULL)
5593 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005594 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5595 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005596 if ((content != NULL) && (content->parent == NULL)) {
5597 /*
5598 * this is a trick: if xmlAddElementDecl is called,
5599 * instead of copying the full tree it is plugged directly
5600 * if called from the parser. Avoid duplicating the
5601 * interfaces or change the API/ABI
5602 */
5603 xmlFreeDocElementContent(ctxt->myDoc, content);
5604 }
5605 } else if (content != NULL) {
5606 xmlFreeDocElementContent(ctxt->myDoc, content);
5607 }
Owen Taylor3473f882001-02-23 17:55:21 +00005608 }
Owen Taylor3473f882001-02-23 17:55:21 +00005609 }
5610 return(ret);
5611}
5612
5613/**
Owen Taylor3473f882001-02-23 17:55:21 +00005614 * xmlParseConditionalSections
5615 * @ctxt: an XML parser context
5616 *
5617 * [61] conditionalSect ::= includeSect | ignoreSect
5618 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5619 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5620 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5621 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5622 */
5623
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005624static void
Owen Taylor3473f882001-02-23 17:55:21 +00005625xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5626 SKIP(3);
5627 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005628 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005629 SKIP(7);
5630 SKIP_BLANKS;
5631 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005632 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005633 } else {
5634 NEXT;
5635 }
5636 if (xmlParserDebugEntities) {
5637 if ((ctxt->input != NULL) && (ctxt->input->filename))
5638 xmlGenericError(xmlGenericErrorContext,
5639 "%s(%d): ", ctxt->input->filename,
5640 ctxt->input->line);
5641 xmlGenericError(xmlGenericErrorContext,
5642 "Entering INCLUDE Conditional Section\n");
5643 }
5644
5645 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5646 (NXT(2) != '>'))) {
5647 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005648 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005649
5650 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5651 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005652 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005653 NEXT;
5654 } else if (RAW == '%') {
5655 xmlParsePEReference(ctxt);
5656 } else
5657 xmlParseMarkupDecl(ctxt);
5658
5659 /*
5660 * Pop-up of finished entities.
5661 */
5662 while ((RAW == 0) && (ctxt->inputNr > 1))
5663 xmlPopInput(ctxt);
5664
Daniel Veillardfdc91562002-07-01 21:52:03 +00005665 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005666 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005667 break;
5668 }
5669 }
5670 if (xmlParserDebugEntities) {
5671 if ((ctxt->input != NULL) && (ctxt->input->filename))
5672 xmlGenericError(xmlGenericErrorContext,
5673 "%s(%d): ", ctxt->input->filename,
5674 ctxt->input->line);
5675 xmlGenericError(xmlGenericErrorContext,
5676 "Leaving INCLUDE Conditional Section\n");
5677 }
5678
Daniel Veillarda07050d2003-10-19 14:46:32 +00005679 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005680 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005681 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005682 int depth = 0;
5683
5684 SKIP(6);
5685 SKIP_BLANKS;
5686 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005687 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005688 } else {
5689 NEXT;
5690 }
5691 if (xmlParserDebugEntities) {
5692 if ((ctxt->input != NULL) && (ctxt->input->filename))
5693 xmlGenericError(xmlGenericErrorContext,
5694 "%s(%d): ", ctxt->input->filename,
5695 ctxt->input->line);
5696 xmlGenericError(xmlGenericErrorContext,
5697 "Entering IGNORE Conditional Section\n");
5698 }
5699
5700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005701 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005702 * But disable SAX event generating DTD building in the meantime
5703 */
5704 state = ctxt->disableSAX;
5705 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005707 ctxt->instate = XML_PARSER_IGNORE;
5708
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005709 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005710 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5711 depth++;
5712 SKIP(3);
5713 continue;
5714 }
5715 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5716 if (--depth >= 0) SKIP(3);
5717 continue;
5718 }
5719 NEXT;
5720 continue;
5721 }
5722
5723 ctxt->disableSAX = state;
5724 ctxt->instate = instate;
5725
5726 if (xmlParserDebugEntities) {
5727 if ((ctxt->input != NULL) && (ctxt->input->filename))
5728 xmlGenericError(xmlGenericErrorContext,
5729 "%s(%d): ", ctxt->input->filename,
5730 ctxt->input->line);
5731 xmlGenericError(xmlGenericErrorContext,
5732 "Leaving IGNORE Conditional Section\n");
5733 }
5734
5735 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005736 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 }
5738
5739 if (RAW == 0)
5740 SHRINK;
5741
5742 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005743 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 } else {
5745 SKIP(3);
5746 }
5747}
5748
5749/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005750 * xmlParseMarkupDecl:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse Markup declarations
5754 *
5755 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5756 * NotationDecl | PI | Comment
5757 *
5758 * [ VC: Proper Declaration/PE Nesting ]
5759 * Parameter-entity replacement text must be properly nested with
5760 * markup declarations. That is to say, if either the first character
5761 * or the last character of a markup declaration (markupdecl above) is
5762 * contained in the replacement text for a parameter-entity reference,
5763 * both must be contained in the same replacement text.
5764 *
5765 * [ WFC: PEs in Internal Subset ]
5766 * In the internal DTD subset, parameter-entity references can occur
5767 * only where markup declarations can occur, not within markup declarations.
5768 * (This does not apply to references that occur in external parameter
5769 * entities or to the external subset.)
5770 */
5771void
5772xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5773 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005774 if (CUR == '<') {
5775 if (NXT(1) == '!') {
5776 switch (NXT(2)) {
5777 case 'E':
5778 if (NXT(3) == 'L')
5779 xmlParseElementDecl(ctxt);
5780 else if (NXT(3) == 'N')
5781 xmlParseEntityDecl(ctxt);
5782 break;
5783 case 'A':
5784 xmlParseAttributeListDecl(ctxt);
5785 break;
5786 case 'N':
5787 xmlParseNotationDecl(ctxt);
5788 break;
5789 case '-':
5790 xmlParseComment(ctxt);
5791 break;
5792 default:
5793 /* there is an error but it will be detected later */
5794 break;
5795 }
5796 } else if (NXT(1) == '?') {
5797 xmlParsePI(ctxt);
5798 }
5799 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005800 /*
5801 * This is only for internal subset. On external entities,
5802 * the replacement is done before parsing stage
5803 */
5804 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5805 xmlParsePEReference(ctxt);
5806
5807 /*
5808 * Conditional sections are allowed from entities included
5809 * by PE References in the internal subset.
5810 */
5811 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5812 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5813 xmlParseConditionalSections(ctxt);
5814 }
5815 }
5816
5817 ctxt->instate = XML_PARSER_DTD;
5818}
5819
5820/**
5821 * xmlParseTextDecl:
5822 * @ctxt: an XML parser context
5823 *
5824 * parse an XML declaration header for external entities
5825 *
5826 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5827 *
5828 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5829 */
5830
5831void
5832xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5833 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005834 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005835
5836 /*
5837 * We know that '<?xml' is here.
5838 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005839 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005840 SKIP(5);
5841 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005842 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005843 return;
5844 }
5845
William M. Brack76e95df2003-10-18 16:20:14 +00005846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5848 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005849 }
5850 SKIP_BLANKS;
5851
5852 /*
5853 * We may have the VersionInfo here.
5854 */
5855 version = xmlParseVersionInfo(ctxt);
5856 if (version == NULL)
5857 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005858 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005859 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005860 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5861 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005862 }
5863 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005864 ctxt->input->version = version;
5865
5866 /*
5867 * We must have the encoding declaration
5868 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005869 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005870 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5871 /*
5872 * The XML REC instructs us to stop parsing right here
5873 */
5874 return;
5875 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005876 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5877 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5878 "Missing encoding in text declaration\n");
5879 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005880
5881 SKIP_BLANKS;
5882 if ((RAW == '?') && (NXT(1) == '>')) {
5883 SKIP(2);
5884 } else if (RAW == '>') {
5885 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005886 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005887 NEXT;
5888 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005889 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005890 MOVETO_ENDTAG(CUR_PTR);
5891 NEXT;
5892 }
5893}
5894
5895/**
Owen Taylor3473f882001-02-23 17:55:21 +00005896 * xmlParseExternalSubset:
5897 * @ctxt: an XML parser context
5898 * @ExternalID: the external identifier
5899 * @SystemID: the system identifier (or URL)
5900 *
5901 * parse Markup declarations from an external subset
5902 *
5903 * [30] extSubset ::= textDecl? extSubsetDecl
5904 *
5905 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5906 */
5907void
5908xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5909 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005910 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005912 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005913 xmlParseTextDecl(ctxt);
5914 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5915 /*
5916 * The XML REC instructs us to stop parsing right here
5917 */
5918 ctxt->instate = XML_PARSER_EOF;
5919 return;
5920 }
5921 }
5922 if (ctxt->myDoc == NULL) {
5923 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5924 }
5925 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5926 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5927
5928 ctxt->instate = XML_PARSER_DTD;
5929 ctxt->external = 1;
5930 while (((RAW == '<') && (NXT(1) == '?')) ||
5931 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005932 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005933 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005934 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005935
5936 GROW;
5937 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5938 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005939 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005940 NEXT;
5941 } else if (RAW == '%') {
5942 xmlParsePEReference(ctxt);
5943 } else
5944 xmlParseMarkupDecl(ctxt);
5945
5946 /*
5947 * Pop-up of finished entities.
5948 */
5949 while ((RAW == 0) && (ctxt->inputNr > 1))
5950 xmlPopInput(ctxt);
5951
Daniel Veillardfdc91562002-07-01 21:52:03 +00005952 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005953 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005954 break;
5955 }
5956 }
5957
5958 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005959 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 }
5961
5962}
5963
5964/**
5965 * xmlParseReference:
5966 * @ctxt: an XML parser context
5967 *
5968 * parse and handle entity references in content, depending on the SAX
5969 * interface, this may end-up in a call to character() if this is a
5970 * CharRef, a predefined entity, if there is no reference() callback.
5971 * or if the parser was asked to switch to that mode.
5972 *
5973 * [67] Reference ::= EntityRef | CharRef
5974 */
5975void
5976xmlParseReference(xmlParserCtxtPtr ctxt) {
5977 xmlEntityPtr ent;
5978 xmlChar *val;
5979 if (RAW != '&') return;
5980
5981 if (NXT(1) == '#') {
5982 int i = 0;
5983 xmlChar out[10];
5984 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005985 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005986
5987 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5988 /*
5989 * So we are using non-UTF-8 buffers
5990 * Check that the char fit on 8bits, if not
5991 * generate a CharRef.
5992 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005993 if (value <= 0xFF) {
5994 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005995 out[1] = 0;
5996 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5997 (!ctxt->disableSAX))
5998 ctxt->sax->characters(ctxt->userData, out, 1);
5999 } else {
6000 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006001 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006002 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006003 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006004 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6005 (!ctxt->disableSAX))
6006 ctxt->sax->reference(ctxt->userData, out);
6007 }
6008 } else {
6009 /*
6010 * Just encode the value in UTF-8
6011 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006012 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006013 out[i] = 0;
6014 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6015 (!ctxt->disableSAX))
6016 ctxt->sax->characters(ctxt->userData, out, i);
6017 }
6018 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006019 int was_checked;
6020
Owen Taylor3473f882001-02-23 17:55:21 +00006021 ent = xmlParseEntityRef(ctxt);
6022 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006023 if (!ctxt->wellFormed)
6024 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006025 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006026 if ((ent->name != NULL) &&
6027 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6028 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006029 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006030
6031
6032 /*
6033 * The first reference to the entity trigger a parsing phase
6034 * where the ent->children is filled with the result from
6035 * the parsing.
6036 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006037 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006038 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006039
Owen Taylor3473f882001-02-23 17:55:21 +00006040 value = ent->content;
6041
6042 /*
6043 * Check that this entity is well formed
6044 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006045 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006046 (value[1] == 0) && (value[0] == '<') &&
6047 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6048 /*
6049 * DONE: get definite answer on this !!!
6050 * Lots of entity decls are used to declare a single
6051 * char
6052 * <!ENTITY lt "<">
6053 * Which seems to be valid since
6054 * 2.4: The ampersand character (&) and the left angle
6055 * bracket (<) may appear in their literal form only
6056 * when used ... They are also legal within the literal
6057 * entity value of an internal entity declaration;i
6058 * see "4.3.2 Well-Formed Parsed Entities".
6059 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6060 * Looking at the OASIS test suite and James Clark
6061 * tests, this is broken. However the XML REC uses
6062 * it. Is the XML REC not well-formed ????
6063 * This is a hack to avoid this problem
6064 *
6065 * ANSWER: since lt gt amp .. are already defined,
6066 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006067 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006068 * is lousy but acceptable.
6069 */
6070 list = xmlNewDocText(ctxt->myDoc, value);
6071 if (list != NULL) {
6072 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6073 (ent->children == NULL)) {
6074 ent->children = list;
6075 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006076 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006077 list->parent = (xmlNodePtr) ent;
6078 } else {
6079 xmlFreeNodeList(list);
6080 }
6081 } else if (list != NULL) {
6082 xmlFreeNodeList(list);
6083 }
6084 } else {
6085 /*
6086 * 4.3.2: An internal general parsed entity is well-formed
6087 * if its replacement text matches the production labeled
6088 * content.
6089 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006090
6091 void *user_data;
6092 /*
6093 * This is a bit hackish but this seems the best
6094 * way to make sure both SAX and DOM entity support
6095 * behaves okay.
6096 */
6097 if (ctxt->userData == ctxt)
6098 user_data = NULL;
6099 else
6100 user_data = ctxt->userData;
6101
Owen Taylor3473f882001-02-23 17:55:21 +00006102 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6103 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006104 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6105 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006106 ctxt->depth--;
6107 } else if (ent->etype ==
6108 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6109 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006110 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006111 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006112 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 ctxt->depth--;
6114 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006115 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006116 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6117 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
6119 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006120 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006121 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006122 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006123 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6124 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006125 (ent->children == NULL)) {
6126 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006127 if (ctxt->replaceEntities) {
6128 /*
6129 * Prune it directly in the generated document
6130 * except for single text nodes.
6131 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006132 if (((list->type == XML_TEXT_NODE) &&
6133 (list->next == NULL)) ||
6134 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006135 list->parent = (xmlNodePtr) ent;
6136 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006137 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006138 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006139 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006140 while (list != NULL) {
6141 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006142 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006143 if (list->next == NULL)
6144 ent->last = list;
6145 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006146 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006147 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006148#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006149 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6150 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006151#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006152 }
6153 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006154 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006155 while (list != NULL) {
6156 list->parent = (xmlNodePtr) ent;
6157 if (list->next == NULL)
6158 ent->last = list;
6159 list = list->next;
6160 }
Owen Taylor3473f882001-02-23 17:55:21 +00006161 }
6162 } else {
6163 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006164 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006166 } else if ((ret != XML_ERR_OK) &&
6167 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006168 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6169 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006170 } else if (list != NULL) {
6171 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006172 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006173 }
6174 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006175 ent->checked = 1;
6176 }
6177
6178 if (ent->children == NULL) {
6179 /*
6180 * Probably running in SAX mode and the callbacks don't
6181 * build the entity content. So unless we already went
6182 * though parsing for first checking go though the entity
6183 * content to generate callbacks associated to the entity
6184 */
6185 if (was_checked == 1) {
6186 void *user_data;
6187 /*
6188 * This is a bit hackish but this seems the best
6189 * way to make sure both SAX and DOM entity support
6190 * behaves okay.
6191 */
6192 if (ctxt->userData == ctxt)
6193 user_data = NULL;
6194 else
6195 user_data = ctxt->userData;
6196
6197 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6198 ctxt->depth++;
6199 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6200 ent->content, user_data, NULL);
6201 ctxt->depth--;
6202 } else if (ent->etype ==
6203 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6204 ctxt->depth++;
6205 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6206 ctxt->sax, user_data, ctxt->depth,
6207 ent->URI, ent->ExternalID, NULL);
6208 ctxt->depth--;
6209 } else {
6210 ret = XML_ERR_ENTITY_PE_INTERNAL;
6211 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6212 "invalid entity type found\n", NULL);
6213 }
6214 if (ret == XML_ERR_ENTITY_LOOP) {
6215 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6216 return;
6217 }
6218 }
6219 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6220 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6221 /*
6222 * Entity reference callback comes second, it's somewhat
6223 * superfluous but a compatibility to historical behaviour
6224 */
6225 ctxt->sax->reference(ctxt->userData, ent->name);
6226 }
6227 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006228 }
6229 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006230 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006231 /*
6232 * Create a node.
6233 */
6234 ctxt->sax->reference(ctxt->userData, ent->name);
6235 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006236 }
6237 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006238 /*
6239 * There is a problem on the handling of _private for entities
6240 * (bug 155816): Should we copy the content of the field from
6241 * the entity (possibly overwriting some value set by the user
6242 * when a copy is created), should we leave it alone, or should
6243 * we try to take care of different situations? The problem
6244 * is exacerbated by the usage of this field by the xmlReader.
6245 * To fix this bug, we look at _private on the created node
6246 * and, if it's NULL, we copy in whatever was in the entity.
6247 * If it's not NULL we leave it alone. This is somewhat of a
6248 * hack - maybe we should have further tests to determine
6249 * what to do.
6250 */
Owen Taylor3473f882001-02-23 17:55:21 +00006251 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6252 /*
6253 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006254 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006255 * In the first occurrence list contains the replacement.
6256 * progressive == 2 means we are operating on the Reader
6257 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006258 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006259 if (((list == NULL) && (ent->owner == 0)) ||
6260 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006261 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006262
6263 /*
6264 * when operating on a reader, the entities definitions
6265 * are always owning the entities subtree.
6266 if (ctxt->parseMode == XML_PARSE_READER)
6267 ent->owner = 1;
6268 */
6269
Daniel Veillard62f313b2001-07-04 19:49:14 +00006270 cur = ent->children;
6271 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006272 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006273 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006274 if (nw->_private == NULL)
6275 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006276 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006277 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006278 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006279 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006280 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006281 if (cur == ent->last) {
6282 /*
6283 * needed to detect some strange empty
6284 * node cases in the reader tests
6285 */
6286 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006287 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006288 (nw->type == XML_ELEMENT_NODE) &&
6289 (nw->children == NULL))
6290 nw->extra = 1;
6291
Daniel Veillard62f313b2001-07-04 19:49:14 +00006292 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006293 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006294 cur = cur->next;
6295 }
Daniel Veillard81273902003-09-30 00:43:48 +00006296#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006297 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006298 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006299#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006300 } else if (list == NULL) {
6301 xmlNodePtr nw = NULL, cur, next, last,
6302 firstChild = NULL;
6303 /*
6304 * Copy the entity child list and make it the new
6305 * entity child list. The goal is to make sure any
6306 * ID or REF referenced will be the one from the
6307 * document content and not the entity copy.
6308 */
6309 cur = ent->children;
6310 ent->children = NULL;
6311 last = ent->last;
6312 ent->last = NULL;
6313 while (cur != NULL) {
6314 next = cur->next;
6315 cur->next = NULL;
6316 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006318 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006319 if (nw->_private == NULL)
6320 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006321 if (firstChild == NULL){
6322 firstChild = cur;
6323 }
6324 xmlAddChild((xmlNodePtr) ent, nw);
6325 xmlAddChild(ctxt->node, cur);
6326 }
6327 if (cur == last)
6328 break;
6329 cur = next;
6330 }
6331 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006332#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006333 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6334 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006335#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006336 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006337 const xmlChar *nbktext;
6338
Daniel Veillard62f313b2001-07-04 19:49:14 +00006339 /*
6340 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006341 * node with a possible previous text one which
6342 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006343 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006344 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6345 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006346 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006347 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006348 if ((ent->last != ent->children) &&
6349 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006350 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006351 xmlAddChildList(ctxt->node, ent->children);
6352 }
6353
Owen Taylor3473f882001-02-23 17:55:21 +00006354 /*
6355 * This is to avoid a nasty side effect, see
6356 * characters() in SAX.c
6357 */
6358 ctxt->nodemem = 0;
6359 ctxt->nodelen = 0;
6360 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 }
6363 } else {
6364 val = ent->content;
6365 if (val == NULL) return;
6366 /*
6367 * inline the entity.
6368 */
6369 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6370 (!ctxt->disableSAX))
6371 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6372 }
6373 }
6374}
6375
6376/**
6377 * xmlParseEntityRef:
6378 * @ctxt: an XML parser context
6379 *
6380 * parse ENTITY references declarations
6381 *
6382 * [68] EntityRef ::= '&' Name ';'
6383 *
6384 * [ WFC: Entity Declared ]
6385 * In a document without any DTD, a document with only an internal DTD
6386 * subset which contains no parameter entity references, or a document
6387 * with "standalone='yes'", the Name given in the entity reference
6388 * must match that in an entity declaration, except that well-formed
6389 * documents need not declare any of the following entities: amp, lt,
6390 * gt, apos, quot. The declaration of a parameter entity must precede
6391 * any reference to it. Similarly, the declaration of a general entity
6392 * must precede any reference to it which appears in a default value in an
6393 * attribute-list declaration. Note that if entities are declared in the
6394 * external subset or in external parameter entities, a non-validating
6395 * processor is not obligated to read and process their declarations;
6396 * for such documents, the rule that an entity must be declared is a
6397 * well-formedness constraint only if standalone='yes'.
6398 *
6399 * [ WFC: Parsed Entity ]
6400 * An entity reference must not contain the name of an unparsed entity
6401 *
6402 * Returns the xmlEntityPtr if found, or NULL otherwise.
6403 */
6404xmlEntityPtr
6405xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006406 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006407 xmlEntityPtr ent = NULL;
6408
6409 GROW;
6410
6411 if (RAW == '&') {
6412 NEXT;
6413 name = xmlParseName(ctxt);
6414 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006415 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6416 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006417 } else {
6418 if (RAW == ';') {
6419 NEXT;
6420 /*
6421 * Ask first SAX for entity resolution, otherwise try the
6422 * predefined set.
6423 */
6424 if (ctxt->sax != NULL) {
6425 if (ctxt->sax->getEntity != NULL)
6426 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006427 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006428 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006429 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6430 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006431 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006432 }
Owen Taylor3473f882001-02-23 17:55:21 +00006433 }
6434 /*
6435 * [ WFC: Entity Declared ]
6436 * In a document without any DTD, a document with only an
6437 * internal DTD subset which contains no parameter entity
6438 * references, or a document with "standalone='yes'", the
6439 * Name given in the entity reference must match that in an
6440 * entity declaration, except that well-formed documents
6441 * need not declare any of the following entities: amp, lt,
6442 * gt, apos, quot.
6443 * The declaration of a parameter entity must precede any
6444 * reference to it.
6445 * Similarly, the declaration of a general entity must
6446 * precede any reference to it which appears in a default
6447 * value in an attribute-list declaration. Note that if
6448 * entities are declared in the external subset or in
6449 * external parameter entities, a non-validating processor
6450 * is not obligated to read and process their declarations;
6451 * for such documents, the rule that an entity must be
6452 * declared is a well-formedness constraint only if
6453 * standalone='yes'.
6454 */
6455 if (ent == NULL) {
6456 if ((ctxt->standalone == 1) ||
6457 ((ctxt->hasExternalSubset == 0) &&
6458 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006459 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006460 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006461 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006462 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006463 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006464 if ((ctxt->inSubset == 0) &&
6465 (ctxt->sax != NULL) &&
6466 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006467 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006468 }
Owen Taylor3473f882001-02-23 17:55:21 +00006469 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006470 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006471 }
6472
6473 /*
6474 * [ WFC: Parsed Entity ]
6475 * An entity reference must not contain the name of an
6476 * unparsed entity
6477 */
6478 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006479 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006480 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 }
6482
6483 /*
6484 * [ WFC: No External Entity References ]
6485 * Attribute values cannot contain direct or indirect
6486 * entity references to external entities.
6487 */
6488 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6489 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006490 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6491 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 }
6493 /*
6494 * [ WFC: No < in Attribute Values ]
6495 * The replacement text of any entity referred to directly or
6496 * indirectly in an attribute value (other than "&lt;") must
6497 * not contain a <.
6498 */
6499 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6500 (ent != NULL) &&
6501 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6502 (ent->content != NULL) &&
6503 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006504 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006505 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006506 }
6507
6508 /*
6509 * Internal check, no parameter entities here ...
6510 */
6511 else {
6512 switch (ent->etype) {
6513 case XML_INTERNAL_PARAMETER_ENTITY:
6514 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006515 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6516 "Attempt to reference the parameter entity '%s'\n",
6517 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006518 break;
6519 default:
6520 break;
6521 }
6522 }
6523
6524 /*
6525 * [ WFC: No Recursion ]
6526 * A parsed entity must not contain a recursive reference
6527 * to itself, either directly or indirectly.
6528 * Done somewhere else
6529 */
6530
6531 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006532 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006533 }
Owen Taylor3473f882001-02-23 17:55:21 +00006534 }
6535 }
6536 return(ent);
6537}
6538
6539/**
6540 * xmlParseStringEntityRef:
6541 * @ctxt: an XML parser context
6542 * @str: a pointer to an index in the string
6543 *
6544 * parse ENTITY references declarations, but this version parses it from
6545 * a string value.
6546 *
6547 * [68] EntityRef ::= '&' Name ';'
6548 *
6549 * [ WFC: Entity Declared ]
6550 * In a document without any DTD, a document with only an internal DTD
6551 * subset which contains no parameter entity references, or a document
6552 * with "standalone='yes'", the Name given in the entity reference
6553 * must match that in an entity declaration, except that well-formed
6554 * documents need not declare any of the following entities: amp, lt,
6555 * gt, apos, quot. The declaration of a parameter entity must precede
6556 * any reference to it. Similarly, the declaration of a general entity
6557 * must precede any reference to it which appears in a default value in an
6558 * attribute-list declaration. Note that if entities are declared in the
6559 * external subset or in external parameter entities, a non-validating
6560 * processor is not obligated to read and process their declarations;
6561 * for such documents, the rule that an entity must be declared is a
6562 * well-formedness constraint only if standalone='yes'.
6563 *
6564 * [ WFC: Parsed Entity ]
6565 * An entity reference must not contain the name of an unparsed entity
6566 *
6567 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6568 * is updated to the current location in the string.
6569 */
6570xmlEntityPtr
6571xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6572 xmlChar *name;
6573 const xmlChar *ptr;
6574 xmlChar cur;
6575 xmlEntityPtr ent = NULL;
6576
6577 if ((str == NULL) || (*str == NULL))
6578 return(NULL);
6579 ptr = *str;
6580 cur = *ptr;
6581 if (cur == '&') {
6582 ptr++;
6583 cur = *ptr;
6584 name = xmlParseStringName(ctxt, &ptr);
6585 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006586 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6587 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006588 } else {
6589 if (*ptr == ';') {
6590 ptr++;
6591 /*
6592 * Ask first SAX for entity resolution, otherwise try the
6593 * predefined set.
6594 */
6595 if (ctxt->sax != NULL) {
6596 if (ctxt->sax->getEntity != NULL)
6597 ent = ctxt->sax->getEntity(ctxt->userData, name);
6598 if (ent == NULL)
6599 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006600 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006601 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006602 }
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604 /*
6605 * [ WFC: Entity Declared ]
6606 * In a document without any DTD, a document with only an
6607 * internal DTD subset which contains no parameter entity
6608 * references, or a document with "standalone='yes'", the
6609 * Name given in the entity reference must match that in an
6610 * entity declaration, except that well-formed documents
6611 * need not declare any of the following entities: amp, lt,
6612 * gt, apos, quot.
6613 * The declaration of a parameter entity must precede any
6614 * reference to it.
6615 * Similarly, the declaration of a general entity must
6616 * precede any reference to it which appears in a default
6617 * value in an attribute-list declaration. Note that if
6618 * entities are declared in the external subset or in
6619 * external parameter entities, a non-validating processor
6620 * is not obligated to read and process their declarations;
6621 * for such documents, the rule that an entity must be
6622 * declared is a well-formedness constraint only if
6623 * standalone='yes'.
6624 */
6625 if (ent == NULL) {
6626 if ((ctxt->standalone == 1) ||
6627 ((ctxt->hasExternalSubset == 0) &&
6628 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006629 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006630 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006631 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006632 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006633 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006634 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006635 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006636 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006637 }
6638
6639 /*
6640 * [ WFC: Parsed Entity ]
6641 * An entity reference must not contain the name of an
6642 * unparsed entity
6643 */
6644 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006645 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006646 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 }
6648
6649 /*
6650 * [ WFC: No External Entity References ]
6651 * Attribute values cannot contain direct or indirect
6652 * entity references to external entities.
6653 */
6654 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6655 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006656 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006657 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659 /*
6660 * [ WFC: No < in Attribute Values ]
6661 * The replacement text of any entity referred to directly or
6662 * indirectly in an attribute value (other than "&lt;") must
6663 * not contain a <.
6664 */
6665 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6666 (ent != NULL) &&
6667 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6668 (ent->content != NULL) &&
6669 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006670 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6671 "'<' in entity '%s' is not allowed in attributes values\n",
6672 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 }
6674
6675 /*
6676 * Internal check, no parameter entities here ...
6677 */
6678 else {
6679 switch (ent->etype) {
6680 case XML_INTERNAL_PARAMETER_ENTITY:
6681 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006682 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6683 "Attempt to reference the parameter entity '%s'\n",
6684 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 break;
6686 default:
6687 break;
6688 }
6689 }
6690
6691 /*
6692 * [ WFC: No Recursion ]
6693 * A parsed entity must not contain a recursive reference
6694 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006695 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006696 */
6697
6698 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006699 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006700 }
6701 xmlFree(name);
6702 }
6703 }
6704 *str = ptr;
6705 return(ent);
6706}
6707
6708/**
6709 * xmlParsePEReference:
6710 * @ctxt: an XML parser context
6711 *
6712 * parse PEReference declarations
6713 * The entity content is handled directly by pushing it's content as
6714 * a new input stream.
6715 *
6716 * [69] PEReference ::= '%' Name ';'
6717 *
6718 * [ WFC: No Recursion ]
6719 * A parsed entity must not contain a recursive
6720 * reference to itself, either directly or indirectly.
6721 *
6722 * [ WFC: Entity Declared ]
6723 * In a document without any DTD, a document with only an internal DTD
6724 * subset which contains no parameter entity references, or a document
6725 * with "standalone='yes'", ... ... The declaration of a parameter
6726 * entity must precede any reference to it...
6727 *
6728 * [ VC: Entity Declared ]
6729 * In a document with an external subset or external parameter entities
6730 * with "standalone='no'", ... ... The declaration of a parameter entity
6731 * must precede any reference to it...
6732 *
6733 * [ WFC: In DTD ]
6734 * Parameter-entity references may only appear in the DTD.
6735 * NOTE: misleading but this is handled.
6736 */
6737void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006738xmlParsePEReference(xmlParserCtxtPtr ctxt)
6739{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006741 xmlEntityPtr entity = NULL;
6742 xmlParserInputPtr input;
6743
6744 if (RAW == '%') {
6745 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006746 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006747 if (name == NULL) {
6748 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6749 "xmlParsePEReference: no name\n");
6750 } else {
6751 if (RAW == ';') {
6752 NEXT;
6753 if ((ctxt->sax != NULL) &&
6754 (ctxt->sax->getParameterEntity != NULL))
6755 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6756 name);
6757 if (entity == NULL) {
6758 /*
6759 * [ WFC: Entity Declared ]
6760 * In a document without any DTD, a document with only an
6761 * internal DTD subset which contains no parameter entity
6762 * references, or a document with "standalone='yes'", ...
6763 * ... The declaration of a parameter entity must precede
6764 * any reference to it...
6765 */
6766 if ((ctxt->standalone == 1) ||
6767 ((ctxt->hasExternalSubset == 0) &&
6768 (ctxt->hasPErefs == 0))) {
6769 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6770 "PEReference: %%%s; not found\n",
6771 name);
6772 } else {
6773 /*
6774 * [ VC: Entity Declared ]
6775 * In a document with an external subset or external
6776 * parameter entities with "standalone='no'", ...
6777 * ... The declaration of a parameter entity must
6778 * precede any reference to it...
6779 */
6780 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6781 "PEReference: %%%s; not found\n",
6782 name, NULL);
6783 ctxt->valid = 0;
6784 }
6785 } else {
6786 /*
6787 * Internal checking in case the entity quest barfed
6788 */
6789 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6790 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6791 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6792 "Internal: %%%s; is not a parameter entity\n",
6793 name, NULL);
6794 } else if (ctxt->input->free != deallocblankswrapper) {
6795 input =
6796 xmlNewBlanksWrapperInputStream(ctxt, entity);
6797 xmlPushInput(ctxt, input);
6798 } else {
6799 /*
6800 * TODO !!!
6801 * handle the extra spaces added before and after
6802 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6803 */
6804 input = xmlNewEntityInputStream(ctxt, entity);
6805 xmlPushInput(ctxt, input);
6806 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006807 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006808 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006809 xmlParseTextDecl(ctxt);
6810 if (ctxt->errNo ==
6811 XML_ERR_UNSUPPORTED_ENCODING) {
6812 /*
6813 * The XML REC instructs us to stop parsing
6814 * right here
6815 */
6816 ctxt->instate = XML_PARSER_EOF;
6817 return;
6818 }
6819 }
6820 }
6821 }
6822 ctxt->hasPErefs = 1;
6823 } else {
6824 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6825 }
6826 }
Owen Taylor3473f882001-02-23 17:55:21 +00006827 }
6828}
6829
6830/**
6831 * xmlParseStringPEReference:
6832 * @ctxt: an XML parser context
6833 * @str: a pointer to an index in the string
6834 *
6835 * parse PEReference declarations
6836 *
6837 * [69] PEReference ::= '%' Name ';'
6838 *
6839 * [ WFC: No Recursion ]
6840 * A parsed entity must not contain a recursive
6841 * reference to itself, either directly or indirectly.
6842 *
6843 * [ WFC: Entity Declared ]
6844 * In a document without any DTD, a document with only an internal DTD
6845 * subset which contains no parameter entity references, or a document
6846 * with "standalone='yes'", ... ... The declaration of a parameter
6847 * entity must precede any reference to it...
6848 *
6849 * [ VC: Entity Declared ]
6850 * In a document with an external subset or external parameter entities
6851 * with "standalone='no'", ... ... The declaration of a parameter entity
6852 * must precede any reference to it...
6853 *
6854 * [ WFC: In DTD ]
6855 * Parameter-entity references may only appear in the DTD.
6856 * NOTE: misleading but this is handled.
6857 *
6858 * Returns the string of the entity content.
6859 * str is updated to the current value of the index
6860 */
6861xmlEntityPtr
6862xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6863 const xmlChar *ptr;
6864 xmlChar cur;
6865 xmlChar *name;
6866 xmlEntityPtr entity = NULL;
6867
6868 if ((str == NULL) || (*str == NULL)) return(NULL);
6869 ptr = *str;
6870 cur = *ptr;
6871 if (cur == '%') {
6872 ptr++;
6873 cur = *ptr;
6874 name = xmlParseStringName(ctxt, &ptr);
6875 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006876 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6877 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006878 } else {
6879 cur = *ptr;
6880 if (cur == ';') {
6881 ptr++;
6882 cur = *ptr;
6883 if ((ctxt->sax != NULL) &&
6884 (ctxt->sax->getParameterEntity != NULL))
6885 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6886 name);
6887 if (entity == NULL) {
6888 /*
6889 * [ WFC: Entity Declared ]
6890 * In a document without any DTD, a document with only an
6891 * internal DTD subset which contains no parameter entity
6892 * references, or a document with "standalone='yes'", ...
6893 * ... The declaration of a parameter entity must precede
6894 * any reference to it...
6895 */
6896 if ((ctxt->standalone == 1) ||
6897 ((ctxt->hasExternalSubset == 0) &&
6898 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006899 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006900 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006901 } else {
6902 /*
6903 * [ VC: Entity Declared ]
6904 * In a document with an external subset or external
6905 * parameter entities with "standalone='no'", ...
6906 * ... The declaration of a parameter entity must
6907 * precede any reference to it...
6908 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6910 "PEReference: %%%s; not found\n",
6911 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006912 ctxt->valid = 0;
6913 }
6914 } else {
6915 /*
6916 * Internal checking in case the entity quest barfed
6917 */
6918 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6919 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006920 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6921 "%%%s; is not a parameter entity\n",
6922 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006923 }
6924 }
6925 ctxt->hasPErefs = 1;
6926 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006927 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006928 }
6929 xmlFree(name);
6930 }
6931 }
6932 *str = ptr;
6933 return(entity);
6934}
6935
6936/**
6937 * xmlParseDocTypeDecl:
6938 * @ctxt: an XML parser context
6939 *
6940 * parse a DOCTYPE declaration
6941 *
6942 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6943 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6944 *
6945 * [ VC: Root Element Type ]
6946 * The Name in the document type declaration must match the element
6947 * type of the root element.
6948 */
6949
6950void
6951xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006952 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006953 xmlChar *ExternalID = NULL;
6954 xmlChar *URI = NULL;
6955
6956 /*
6957 * We know that '<!DOCTYPE' has been detected.
6958 */
6959 SKIP(9);
6960
6961 SKIP_BLANKS;
6962
6963 /*
6964 * Parse the DOCTYPE name.
6965 */
6966 name = xmlParseName(ctxt);
6967 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006968 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6969 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006970 }
6971 ctxt->intSubName = name;
6972
6973 SKIP_BLANKS;
6974
6975 /*
6976 * Check for SystemID and ExternalID
6977 */
6978 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6979
6980 if ((URI != NULL) || (ExternalID != NULL)) {
6981 ctxt->hasExternalSubset = 1;
6982 }
6983 ctxt->extSubURI = URI;
6984 ctxt->extSubSystem = ExternalID;
6985
6986 SKIP_BLANKS;
6987
6988 /*
6989 * Create and update the internal subset.
6990 */
6991 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6992 (!ctxt->disableSAX))
6993 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6994
6995 /*
6996 * Is there any internal subset declarations ?
6997 * they are handled separately in xmlParseInternalSubset()
6998 */
6999 if (RAW == '[')
7000 return;
7001
7002 /*
7003 * We should be at the end of the DOCTYPE declaration.
7004 */
7005 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007006 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007007 }
7008 NEXT;
7009}
7010
7011/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007012 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007013 * @ctxt: an XML parser context
7014 *
7015 * parse the internal subset declaration
7016 *
7017 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7018 */
7019
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007020static void
Owen Taylor3473f882001-02-23 17:55:21 +00007021xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7022 /*
7023 * Is there any DTD definition ?
7024 */
7025 if (RAW == '[') {
7026 ctxt->instate = XML_PARSER_DTD;
7027 NEXT;
7028 /*
7029 * Parse the succession of Markup declarations and
7030 * PEReferences.
7031 * Subsequence (markupdecl | PEReference | S)*
7032 */
7033 while (RAW != ']') {
7034 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007035 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007036
7037 SKIP_BLANKS;
7038 xmlParseMarkupDecl(ctxt);
7039 xmlParsePEReference(ctxt);
7040
7041 /*
7042 * Pop-up of finished entities.
7043 */
7044 while ((RAW == 0) && (ctxt->inputNr > 1))
7045 xmlPopInput(ctxt);
7046
7047 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007048 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007049 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007050 break;
7051 }
7052 }
7053 if (RAW == ']') {
7054 NEXT;
7055 SKIP_BLANKS;
7056 }
7057 }
7058
7059 /*
7060 * We should be at the end of the DOCTYPE declaration.
7061 */
7062 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007063 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007064 }
7065 NEXT;
7066}
7067
Daniel Veillard81273902003-09-30 00:43:48 +00007068#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007069/**
7070 * xmlParseAttribute:
7071 * @ctxt: an XML parser context
7072 * @value: a xmlChar ** used to store the value of the attribute
7073 *
7074 * parse an attribute
7075 *
7076 * [41] Attribute ::= Name Eq AttValue
7077 *
7078 * [ WFC: No External Entity References ]
7079 * Attribute values cannot contain direct or indirect entity references
7080 * to external entities.
7081 *
7082 * [ WFC: No < in Attribute Values ]
7083 * The replacement text of any entity referred to directly or indirectly in
7084 * an attribute value (other than "&lt;") must not contain a <.
7085 *
7086 * [ VC: Attribute Value Type ]
7087 * The attribute must have been declared; the value must be of the type
7088 * declared for it.
7089 *
7090 * [25] Eq ::= S? '=' S?
7091 *
7092 * With namespace:
7093 *
7094 * [NS 11] Attribute ::= QName Eq AttValue
7095 *
7096 * Also the case QName == xmlns:??? is handled independently as a namespace
7097 * definition.
7098 *
7099 * Returns the attribute name, and the value in *value.
7100 */
7101
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007102const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007103xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007104 const xmlChar *name;
7105 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007106
7107 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007108 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007109 name = xmlParseName(ctxt);
7110 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007112 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007113 return(NULL);
7114 }
7115
7116 /*
7117 * read the value
7118 */
7119 SKIP_BLANKS;
7120 if (RAW == '=') {
7121 NEXT;
7122 SKIP_BLANKS;
7123 val = xmlParseAttValue(ctxt);
7124 ctxt->instate = XML_PARSER_CONTENT;
7125 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007126 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007127 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 return(NULL);
7129 }
7130
7131 /*
7132 * Check that xml:lang conforms to the specification
7133 * No more registered as an error, just generate a warning now
7134 * since this was deprecated in XML second edition
7135 */
7136 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7137 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007138 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7139 "Malformed value for xml:lang : %s\n",
7140 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007141 }
7142 }
7143
7144 /*
7145 * Check that xml:space conforms to the specification
7146 */
7147 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7148 if (xmlStrEqual(val, BAD_CAST "default"))
7149 *(ctxt->space) = 0;
7150 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7151 *(ctxt->space) = 1;
7152 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007153 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007154"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007155 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007156 }
7157 }
7158
7159 *value = val;
7160 return(name);
7161}
7162
7163/**
7164 * xmlParseStartTag:
7165 * @ctxt: an XML parser context
7166 *
7167 * parse a start of tag either for rule element or
7168 * EmptyElement. In both case we don't parse the tag closing chars.
7169 *
7170 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7171 *
7172 * [ WFC: Unique Att Spec ]
7173 * No attribute name may appear more than once in the same start-tag or
7174 * empty-element tag.
7175 *
7176 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7177 *
7178 * [ WFC: Unique Att Spec ]
7179 * No attribute name may appear more than once in the same start-tag or
7180 * empty-element tag.
7181 *
7182 * With namespace:
7183 *
7184 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7185 *
7186 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7187 *
7188 * Returns the element name parsed
7189 */
7190
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007191const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007192xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007193 const xmlChar *name;
7194 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007195 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007196 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007197 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007198 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007199 int i;
7200
7201 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007202 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007203
7204 name = xmlParseName(ctxt);
7205 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007206 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007207 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007208 return(NULL);
7209 }
7210
7211 /*
7212 * Now parse the attributes, it ends up with the ending
7213 *
7214 * (S Attribute)* S?
7215 */
7216 SKIP_BLANKS;
7217 GROW;
7218
Daniel Veillard21a0f912001-02-25 19:54:14 +00007219 while ((RAW != '>') &&
7220 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007221 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007222 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007223 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007224
7225 attname = xmlParseAttribute(ctxt, &attvalue);
7226 if ((attname != NULL) && (attvalue != NULL)) {
7227 /*
7228 * [ WFC: Unique Att Spec ]
7229 * No attribute name may appear more than once in the same
7230 * start-tag or empty-element tag.
7231 */
7232 for (i = 0; i < nbatts;i += 2) {
7233 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007234 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007235 xmlFree(attvalue);
7236 goto failed;
7237 }
7238 }
Owen Taylor3473f882001-02-23 17:55:21 +00007239 /*
7240 * Add the pair to atts
7241 */
7242 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007243 maxatts = 22; /* allow for 10 attrs by default */
7244 atts = (const xmlChar **)
7245 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007246 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007247 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007248 if (attvalue != NULL)
7249 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007250 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007251 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007252 ctxt->atts = atts;
7253 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007254 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007255 const xmlChar **n;
7256
Owen Taylor3473f882001-02-23 17:55:21 +00007257 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007258 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007259 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007260 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007261 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007262 if (attvalue != NULL)
7263 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007264 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007265 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007266 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007267 ctxt->atts = atts;
7268 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007269 }
7270 atts[nbatts++] = attname;
7271 atts[nbatts++] = attvalue;
7272 atts[nbatts] = NULL;
7273 atts[nbatts + 1] = NULL;
7274 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007275 if (attvalue != NULL)
7276 xmlFree(attvalue);
7277 }
7278
7279failed:
7280
Daniel Veillard3772de32002-12-17 10:31:45 +00007281 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007282 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7283 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007284 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7286 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007287 }
7288 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007289 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7290 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007291 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7292 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007293 break;
7294 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007295 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007296 GROW;
7297 }
7298
7299 /*
7300 * SAX: Start of Element !
7301 */
7302 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007303 (!ctxt->disableSAX)) {
7304 if (nbatts > 0)
7305 ctxt->sax->startElement(ctxt->userData, name, atts);
7306 else
7307 ctxt->sax->startElement(ctxt->userData, name, NULL);
7308 }
Owen Taylor3473f882001-02-23 17:55:21 +00007309
7310 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007311 /* Free only the content strings */
7312 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007313 if (atts[i] != NULL)
7314 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007315 }
7316 return(name);
7317}
7318
7319/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007320 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007321 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007322 * @line: line of the start tag
7323 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007324 *
7325 * parse an end of tag
7326 *
7327 * [42] ETag ::= '</' Name S? '>'
7328 *
7329 * With namespace
7330 *
7331 * [NS 9] ETag ::= '</' QName S? '>'
7332 */
7333
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007334static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007335xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007337
7338 GROW;
7339 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007340 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007341 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007342 return;
7343 }
7344 SKIP(2);
7345
Daniel Veillard46de64e2002-05-29 08:21:33 +00007346 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007347
7348 /*
7349 * We should definitely be at the ending "S? '>'" part
7350 */
7351 GROW;
7352 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007353 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007354 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007355 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007356 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007357
7358 /*
7359 * [ WFC: Element Type Match ]
7360 * The Name in an element's end-tag must match the element type in the
7361 * start-tag.
7362 *
7363 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007364 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007365 if (name == NULL) name = BAD_CAST "unparseable";
7366 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007367 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007368 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007369 }
7370
7371 /*
7372 * SAX: End of Tag
7373 */
7374 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7375 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007376 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007377
Daniel Veillarde57ec792003-09-10 10:50:59 +00007378 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007379 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007380 return;
7381}
7382
7383/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007384 * xmlParseEndTag:
7385 * @ctxt: an XML parser context
7386 *
7387 * parse an end of tag
7388 *
7389 * [42] ETag ::= '</' Name S? '>'
7390 *
7391 * With namespace
7392 *
7393 * [NS 9] ETag ::= '</' QName S? '>'
7394 */
7395
7396void
7397xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 xmlParseEndTag1(ctxt, 0);
7399}
Daniel Veillard81273902003-09-30 00:43:48 +00007400#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007401
7402/************************************************************************
7403 * *
7404 * SAX 2 specific operations *
7405 * *
7406 ************************************************************************/
7407
7408static const xmlChar *
7409xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7410 int len = 0, l;
7411 int c;
7412 int count = 0;
7413
7414 /*
7415 * Handler for more complex cases
7416 */
7417 GROW;
7418 c = CUR_CHAR(l);
7419 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007420 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007421 return(NULL);
7422 }
7423
7424 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007425 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007426 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007427 (IS_COMBINING(c)) ||
7428 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007429 if (count++ > 100) {
7430 count = 0;
7431 GROW;
7432 }
7433 len += l;
7434 NEXTL(l);
7435 c = CUR_CHAR(l);
7436 }
7437 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7438}
7439
7440/*
7441 * xmlGetNamespace:
7442 * @ctxt: an XML parser context
7443 * @prefix: the prefix to lookup
7444 *
7445 * Lookup the namespace name for the @prefix (which ca be NULL)
7446 * The prefix must come from the @ctxt->dict dictionnary
7447 *
7448 * Returns the namespace name or NULL if not bound
7449 */
7450static const xmlChar *
7451xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7452 int i;
7453
Daniel Veillarde57ec792003-09-10 10:50:59 +00007454 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007455 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007456 if (ctxt->nsTab[i] == prefix) {
7457 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7458 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007459 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007460 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 return(NULL);
7462}
7463
7464/**
7465 * xmlParseNCName:
7466 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007467 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007468 *
7469 * parse an XML name.
7470 *
7471 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7472 * CombiningChar | Extender
7473 *
7474 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7475 *
7476 * Returns the Name parsed or NULL
7477 */
7478
7479static const xmlChar *
7480xmlParseNCName(xmlParserCtxtPtr ctxt) {
7481 const xmlChar *in;
7482 const xmlChar *ret;
7483 int count = 0;
7484
7485 /*
7486 * Accelerator for simple ASCII names
7487 */
7488 in = ctxt->input->cur;
7489 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7490 ((*in >= 0x41) && (*in <= 0x5A)) ||
7491 (*in == '_')) {
7492 in++;
7493 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7494 ((*in >= 0x41) && (*in <= 0x5A)) ||
7495 ((*in >= 0x30) && (*in <= 0x39)) ||
7496 (*in == '_') || (*in == '-') ||
7497 (*in == '.'))
7498 in++;
7499 if ((*in > 0) && (*in < 0x80)) {
7500 count = in - ctxt->input->cur;
7501 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7502 ctxt->input->cur = in;
7503 ctxt->nbChars += count;
7504 ctxt->input->col += count;
7505 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007506 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007507 }
7508 return(ret);
7509 }
7510 }
7511 return(xmlParseNCNameComplex(ctxt));
7512}
7513
7514/**
7515 * xmlParseQName:
7516 * @ctxt: an XML parser context
7517 * @prefix: pointer to store the prefix part
7518 *
7519 * parse an XML Namespace QName
7520 *
7521 * [6] QName ::= (Prefix ':')? LocalPart
7522 * [7] Prefix ::= NCName
7523 * [8] LocalPart ::= NCName
7524 *
7525 * Returns the Name parsed or NULL
7526 */
7527
7528static const xmlChar *
7529xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7530 const xmlChar *l, *p;
7531
7532 GROW;
7533
7534 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007535 if (l == NULL) {
7536 if (CUR == ':') {
7537 l = xmlParseName(ctxt);
7538 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007539 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7540 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007541 *prefix = NULL;
7542 return(l);
7543 }
7544 }
7545 return(NULL);
7546 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007547 if (CUR == ':') {
7548 NEXT;
7549 p = l;
7550 l = xmlParseNCName(ctxt);
7551 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007552 xmlChar *tmp;
7553
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007554 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7555 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007556 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7557 p = xmlDictLookup(ctxt->dict, tmp, -1);
7558 if (tmp != NULL) xmlFree(tmp);
7559 *prefix = NULL;
7560 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561 }
7562 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007563 xmlChar *tmp;
7564
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007565 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7566 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007567 NEXT;
7568 tmp = (xmlChar *) xmlParseName(ctxt);
7569 if (tmp != NULL) {
7570 tmp = xmlBuildQName(tmp, l, NULL, 0);
7571 l = xmlDictLookup(ctxt->dict, tmp, -1);
7572 if (tmp != NULL) xmlFree(tmp);
7573 *prefix = p;
7574 return(l);
7575 }
7576 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7577 l = xmlDictLookup(ctxt->dict, tmp, -1);
7578 if (tmp != NULL) xmlFree(tmp);
7579 *prefix = p;
7580 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007581 }
7582 *prefix = p;
7583 } else
7584 *prefix = NULL;
7585 return(l);
7586}
7587
7588/**
7589 * xmlParseQNameAndCompare:
7590 * @ctxt: an XML parser context
7591 * @name: the localname
7592 * @prefix: the prefix, if any.
7593 *
7594 * parse an XML name and compares for match
7595 * (specialized for endtag parsing)
7596 *
7597 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7598 * and the name for mismatch
7599 */
7600
7601static const xmlChar *
7602xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7603 xmlChar const *prefix) {
7604 const xmlChar *cmp = name;
7605 const xmlChar *in;
7606 const xmlChar *ret;
7607 const xmlChar *prefix2;
7608
7609 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7610
7611 GROW;
7612 in = ctxt->input->cur;
7613
7614 cmp = prefix;
7615 while (*in != 0 && *in == *cmp) {
7616 ++in;
7617 ++cmp;
7618 }
7619 if ((*cmp == 0) && (*in == ':')) {
7620 in++;
7621 cmp = name;
7622 while (*in != 0 && *in == *cmp) {
7623 ++in;
7624 ++cmp;
7625 }
William M. Brack76e95df2003-10-18 16:20:14 +00007626 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 /* success */
7628 ctxt->input->cur = in;
7629 return((const xmlChar*) 1);
7630 }
7631 }
7632 /*
7633 * all strings coms from the dictionary, equality can be done directly
7634 */
7635 ret = xmlParseQName (ctxt, &prefix2);
7636 if ((ret == name) && (prefix == prefix2))
7637 return((const xmlChar*) 1);
7638 return ret;
7639}
7640
7641/**
7642 * xmlParseAttValueInternal:
7643 * @ctxt: an XML parser context
7644 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007645 * @alloc: whether the attribute was reallocated as a new string
7646 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007647 *
7648 * parse a value for an attribute.
7649 * NOTE: if no normalization is needed, the routine will return pointers
7650 * directly from the data buffer.
7651 *
7652 * 3.3.3 Attribute-Value Normalization:
7653 * Before the value of an attribute is passed to the application or
7654 * checked for validity, the XML processor must normalize it as follows:
7655 * - a character reference is processed by appending the referenced
7656 * character to the attribute value
7657 * - an entity reference is processed by recursively processing the
7658 * replacement text of the entity
7659 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7660 * appending #x20 to the normalized value, except that only a single
7661 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7662 * parsed entity or the literal entity value of an internal parsed entity
7663 * - other characters are processed by appending them to the normalized value
7664 * If the declared value is not CDATA, then the XML processor must further
7665 * process the normalized attribute value by discarding any leading and
7666 * trailing space (#x20) characters, and by replacing sequences of space
7667 * (#x20) characters by a single space (#x20) character.
7668 * All attributes for which no declaration has been read should be treated
7669 * by a non-validating parser as if declared CDATA.
7670 *
7671 * Returns the AttValue parsed or NULL. The value has to be freed by the
7672 * caller if it was copied, this can be detected by val[*len] == 0.
7673 */
7674
7675static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007676xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7677 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007678{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007680 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007681 xmlChar *ret = NULL;
7682
7683 GROW;
7684 in = (xmlChar *) CUR_PTR;
7685 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007686 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 return (NULL);
7688 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007690
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007691 /*
7692 * try to handle in this routine the most common case where no
7693 * allocation of a new string is required and where content is
7694 * pure ASCII.
7695 */
7696 limit = *in++;
7697 end = ctxt->input->end;
7698 start = in;
7699 if (in >= end) {
7700 const xmlChar *oldbase = ctxt->input->base;
7701 GROW;
7702 if (oldbase != ctxt->input->base) {
7703 long delta = ctxt->input->base - oldbase;
7704 start = start + delta;
7705 in = in + delta;
7706 }
7707 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007709 if (normalize) {
7710 /*
7711 * Skip any leading spaces
7712 */
7713 while ((in < end) && (*in != limit) &&
7714 ((*in == 0x20) || (*in == 0x9) ||
7715 (*in == 0xA) || (*in == 0xD))) {
7716 in++;
7717 start = in;
7718 if (in >= end) {
7719 const xmlChar *oldbase = ctxt->input->base;
7720 GROW;
7721 if (oldbase != ctxt->input->base) {
7722 long delta = ctxt->input->base - oldbase;
7723 start = start + delta;
7724 in = in + delta;
7725 }
7726 end = ctxt->input->end;
7727 }
7728 }
7729 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7730 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7731 if ((*in++ == 0x20) && (*in == 0x20)) break;
7732 if (in >= end) {
7733 const xmlChar *oldbase = ctxt->input->base;
7734 GROW;
7735 if (oldbase != ctxt->input->base) {
7736 long delta = ctxt->input->base - oldbase;
7737 start = start + delta;
7738 in = in + delta;
7739 }
7740 end = ctxt->input->end;
7741 }
7742 }
7743 last = in;
7744 /*
7745 * skip the trailing blanks
7746 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007747 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007748 while ((in < end) && (*in != limit) &&
7749 ((*in == 0x20) || (*in == 0x9) ||
7750 (*in == 0xA) || (*in == 0xD))) {
7751 in++;
7752 if (in >= end) {
7753 const xmlChar *oldbase = ctxt->input->base;
7754 GROW;
7755 if (oldbase != ctxt->input->base) {
7756 long delta = ctxt->input->base - oldbase;
7757 start = start + delta;
7758 in = in + delta;
7759 last = last + delta;
7760 }
7761 end = ctxt->input->end;
7762 }
7763 }
7764 if (*in != limit) goto need_complex;
7765 } else {
7766 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7767 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7768 in++;
7769 if (in >= end) {
7770 const xmlChar *oldbase = ctxt->input->base;
7771 GROW;
7772 if (oldbase != ctxt->input->base) {
7773 long delta = ctxt->input->base - oldbase;
7774 start = start + delta;
7775 in = in + delta;
7776 }
7777 end = ctxt->input->end;
7778 }
7779 }
7780 last = in;
7781 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007783 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007785 *len = last - start;
7786 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007788 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007789 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007790 }
7791 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007792 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007794need_complex:
7795 if (alloc) *alloc = 1;
7796 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007797}
7798
7799/**
7800 * xmlParseAttribute2:
7801 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007802 * @pref: the element prefix
7803 * @elem: the element name
7804 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007805 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007806 * @len: an int * to save the length of the attribute
7807 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007808 *
7809 * parse an attribute in the new SAX2 framework.
7810 *
7811 * Returns the attribute name, and the value in *value, .
7812 */
7813
7814static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007815xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7816 const xmlChar *pref, const xmlChar *elem,
7817 const xmlChar **prefix, xmlChar **value,
7818 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007819 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007820 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007821 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822
7823 *value = NULL;
7824 GROW;
7825 name = xmlParseQName(ctxt, prefix);
7826 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007827 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7828 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007829 return(NULL);
7830 }
7831
7832 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007833 * get the type if needed
7834 */
7835 if (ctxt->attsSpecial != NULL) {
7836 int type;
7837
7838 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7839 pref, elem, *prefix, name);
7840 if (type != 0) normalize = 1;
7841 }
7842
7843 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007844 * read the value
7845 */
7846 SKIP_BLANKS;
7847 if (RAW == '=') {
7848 NEXT;
7849 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007850 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007851 ctxt->instate = XML_PARSER_CONTENT;
7852 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007853 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007854 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007855 return(NULL);
7856 }
7857
Daniel Veillardd8925572005-06-08 22:34:55 +00007858 if (*prefix == ctxt->str_xml) {
7859 /*
7860 * Check that xml:lang conforms to the specification
7861 * No more registered as an error, just generate a warning now
7862 * since this was deprecated in XML second edition
7863 */
7864 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7865 internal_val = xmlStrndup(val, *len);
7866 if (!xmlCheckLanguageID(internal_val)) {
7867 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7868 "Malformed value for xml:lang : %s\n",
7869 internal_val, NULL);
7870 }
7871 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007872
Daniel Veillardd8925572005-06-08 22:34:55 +00007873 /*
7874 * Check that xml:space conforms to the specification
7875 */
7876 if (xmlStrEqual(name, BAD_CAST "space")) {
7877 internal_val = xmlStrndup(val, *len);
7878 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7879 *(ctxt->space) = 0;
7880 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7881 *(ctxt->space) = 1;
7882 else {
7883 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007884"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007885 internal_val, NULL);
7886 }
7887 }
7888 if (internal_val) {
7889 xmlFree(internal_val);
7890 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007891 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007892
7893 *value = val;
7894 return(name);
7895}
7896
7897/**
7898 * xmlParseStartTag2:
7899 * @ctxt: an XML parser context
7900 *
7901 * parse a start of tag either for rule element or
7902 * EmptyElement. In both case we don't parse the tag closing chars.
7903 * This routine is called when running SAX2 parsing
7904 *
7905 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7906 *
7907 * [ WFC: Unique Att Spec ]
7908 * No attribute name may appear more than once in the same start-tag or
7909 * empty-element tag.
7910 *
7911 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7912 *
7913 * [ WFC: Unique Att Spec ]
7914 * No attribute name may appear more than once in the same start-tag or
7915 * empty-element tag.
7916 *
7917 * With namespace:
7918 *
7919 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7920 *
7921 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7922 *
7923 * Returns the element name parsed
7924 */
7925
7926static const xmlChar *
7927xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007928 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 const xmlChar *localname;
7930 const xmlChar *prefix;
7931 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007932 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007933 const xmlChar *nsname;
7934 xmlChar *attvalue;
7935 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007937 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007938 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 const xmlChar *base;
7940 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007941 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942
7943 if (RAW != '<') return(NULL);
7944 NEXT1;
7945
7946 /*
7947 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7948 * point since the attribute values may be stored as pointers to
7949 * the buffer and calling SHRINK would destroy them !
7950 * The Shrinking is only possible once the full set of attribute
7951 * callbacks have been done.
7952 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007953reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007955 base = ctxt->input->base;
7956 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007957 oldline = ctxt->input->line;
7958 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007959 nbatts = 0;
7960 nratts = 0;
7961 nbdef = 0;
7962 nbNs = 0;
7963 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007964 /* Forget any namespaces added during an earlier parse of this element. */
7965 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966
7967 localname = xmlParseQName(ctxt, &prefix);
7968 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007969 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7970 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007971 return(NULL);
7972 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007973 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974
7975 /*
7976 * Now parse the attributes, it ends up with the ending
7977 *
7978 * (S Attribute)* S?
7979 */
7980 SKIP_BLANKS;
7981 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007982 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007983
7984 while ((RAW != '>') &&
7985 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007986 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 const xmlChar *q = CUR_PTR;
7988 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007989 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007991 attname = xmlParseAttribute2(ctxt, prefix, localname,
7992 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007993 if (ctxt->input->base != base) {
7994 if ((attvalue != NULL) && (alloc != 0))
7995 xmlFree(attvalue);
7996 attvalue = NULL;
7997 goto base_changed;
7998 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007999 if ((attname != NULL) && (attvalue != NULL)) {
8000 if (len < 0) len = xmlStrlen(attvalue);
8001 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008002 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8003 xmlURIPtr uri;
8004
8005 if (*URL != 0) {
8006 uri = xmlParseURI((const char *) URL);
8007 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008008 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8009 "xmlns: %s not a valid URI\n",
8010 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008011 } else {
8012 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008013 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8014 "xmlns: URI %s is not absolute\n",
8015 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008016 }
8017 xmlFreeURI(uri);
8018 }
8019 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008020 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008021 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008022 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008023 for (j = 1;j <= nbNs;j++)
8024 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8025 break;
8026 if (j <= nbNs)
8027 xmlErrAttributeDup(ctxt, NULL, attname);
8028 else
8029 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008030 if (alloc != 0) xmlFree(attvalue);
8031 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008032 continue;
8033 }
8034 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008035 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8036 xmlURIPtr uri;
8037
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008038 if (attname == ctxt->str_xml) {
8039 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008040 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8041 "xml namespace prefix mapped to wrong URI\n",
8042 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008043 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008044 /*
8045 * Do not keep a namespace definition node
8046 */
8047 if (alloc != 0) xmlFree(attvalue);
8048 SKIP_BLANKS;
8049 continue;
8050 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008051 uri = xmlParseURI((const char *) URL);
8052 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008053 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8054 "xmlns:%s: '%s' is not a valid URI\n",
8055 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008056 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008057 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008058 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8059 "xmlns:%s: URI %s is not absolute\n",
8060 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008061 }
8062 xmlFreeURI(uri);
8063 }
8064
Daniel Veillard0fb18932003-09-07 09:14:37 +00008065 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008066 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008067 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008068 for (j = 1;j <= nbNs;j++)
8069 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8070 break;
8071 if (j <= nbNs)
8072 xmlErrAttributeDup(ctxt, aprefix, attname);
8073 else
8074 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008075 if (alloc != 0) xmlFree(attvalue);
8076 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008077 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078 continue;
8079 }
8080
8081 /*
8082 * Add the pair to atts
8083 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008084 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8085 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008086 if (attvalue[len] == 0)
8087 xmlFree(attvalue);
8088 goto failed;
8089 }
8090 maxatts = ctxt->maxatts;
8091 atts = ctxt->atts;
8092 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008093 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008094 atts[nbatts++] = attname;
8095 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008096 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008097 atts[nbatts++] = attvalue;
8098 attvalue += len;
8099 atts[nbatts++] = attvalue;
8100 /*
8101 * tag if some deallocation is needed
8102 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008103 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008104 } else {
8105 if ((attvalue != NULL) && (attvalue[len] == 0))
8106 xmlFree(attvalue);
8107 }
8108
8109failed:
8110
8111 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008112 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008113 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8114 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008115 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008116 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8117 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008118 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008119 }
8120 SKIP_BLANKS;
8121 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8122 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008123 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008124 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008125 break;
8126 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008127 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008128 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008129 }
8130
Daniel Veillard0fb18932003-09-07 09:14:37 +00008131 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008132 * The attributes defaulting
8133 */
8134 if (ctxt->attsDefault != NULL) {
8135 xmlDefAttrsPtr defaults;
8136
8137 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8138 if (defaults != NULL) {
8139 for (i = 0;i < defaults->nbAttrs;i++) {
8140 attname = defaults->values[4 * i];
8141 aprefix = defaults->values[4 * i + 1];
8142
8143 /*
8144 * special work for namespaces defaulted defs
8145 */
8146 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8147 /*
8148 * check that it's not a defined namespace
8149 */
8150 for (j = 1;j <= nbNs;j++)
8151 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8152 break;
8153 if (j <= nbNs) continue;
8154
8155 nsname = xmlGetNamespace(ctxt, NULL);
8156 if (nsname != defaults->values[4 * i + 2]) {
8157 if (nsPush(ctxt, NULL,
8158 defaults->values[4 * i + 2]) > 0)
8159 nbNs++;
8160 }
8161 } else if (aprefix == ctxt->str_xmlns) {
8162 /*
8163 * check that it's not a defined namespace
8164 */
8165 for (j = 1;j <= nbNs;j++)
8166 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8167 break;
8168 if (j <= nbNs) continue;
8169
8170 nsname = xmlGetNamespace(ctxt, attname);
8171 if (nsname != defaults->values[2]) {
8172 if (nsPush(ctxt, attname,
8173 defaults->values[4 * i + 2]) > 0)
8174 nbNs++;
8175 }
8176 } else {
8177 /*
8178 * check that it's not a defined attribute
8179 */
8180 for (j = 0;j < nbatts;j+=5) {
8181 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8182 break;
8183 }
8184 if (j < nbatts) continue;
8185
8186 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8187 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008188 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008189 }
8190 maxatts = ctxt->maxatts;
8191 atts = ctxt->atts;
8192 }
8193 atts[nbatts++] = attname;
8194 atts[nbatts++] = aprefix;
8195 if (aprefix == NULL)
8196 atts[nbatts++] = NULL;
8197 else
8198 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8199 atts[nbatts++] = defaults->values[4 * i + 2];
8200 atts[nbatts++] = defaults->values[4 * i + 3];
8201 nbdef++;
8202 }
8203 }
8204 }
8205 }
8206
Daniel Veillarde70c8772003-11-25 07:21:18 +00008207 /*
8208 * The attributes checkings
8209 */
8210 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008211 /*
8212 * The default namespace does not apply to attribute names.
8213 */
8214 if (atts[i + 1] != NULL) {
8215 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8216 if (nsname == NULL) {
8217 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8218 "Namespace prefix %s for %s on %s is not defined\n",
8219 atts[i + 1], atts[i], localname);
8220 }
8221 atts[i + 2] = nsname;
8222 } else
8223 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008224 /*
8225 * [ WFC: Unique Att Spec ]
8226 * No attribute name may appear more than once in the same
8227 * start-tag or empty-element tag.
8228 * As extended by the Namespace in XML REC.
8229 */
8230 for (j = 0; j < i;j += 5) {
8231 if (atts[i] == atts[j]) {
8232 if (atts[i+1] == atts[j+1]) {
8233 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8234 break;
8235 }
8236 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8237 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8238 "Namespaced Attribute %s in '%s' redefined\n",
8239 atts[i], nsname, NULL);
8240 break;
8241 }
8242 }
8243 }
8244 }
8245
Daniel Veillarde57ec792003-09-10 10:50:59 +00008246 nsname = xmlGetNamespace(ctxt, prefix);
8247 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008248 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8249 "Namespace prefix %s on %s is not defined\n",
8250 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008251 }
8252 *pref = prefix;
8253 *URI = nsname;
8254
8255 /*
8256 * SAX: Start of Element !
8257 */
8258 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8259 (!ctxt->disableSAX)) {
8260 if (nbNs > 0)
8261 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8262 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8263 nbatts / 5, nbdef, atts);
8264 else
8265 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8266 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8267 }
8268
8269 /*
8270 * Free up attribute allocated strings if needed
8271 */
8272 if (attval != 0) {
8273 for (i = 3,j = 0; j < nratts;i += 5,j++)
8274 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8275 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 }
8277
8278 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008279
8280base_changed:
8281 /*
8282 * the attribute strings are valid iif the base didn't changed
8283 */
8284 if (attval != 0) {
8285 for (i = 3,j = 0; j < nratts;i += 5,j++)
8286 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8287 xmlFree((xmlChar *) atts[i]);
8288 }
8289 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008290 ctxt->input->line = oldline;
8291 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008292 if (ctxt->wellFormed == 1) {
8293 goto reparse;
8294 }
8295 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008296}
8297
8298/**
8299 * xmlParseEndTag2:
8300 * @ctxt: an XML parser context
8301 * @line: line of the start tag
8302 * @nsNr: number of namespaces on the start tag
8303 *
8304 * parse an end of tag
8305 *
8306 * [42] ETag ::= '</' Name S? '>'
8307 *
8308 * With namespace
8309 *
8310 * [NS 9] ETag ::= '</' QName S? '>'
8311 */
8312
8313static void
8314xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008315 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008316 const xmlChar *name;
8317
8318 GROW;
8319 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008320 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008321 return;
8322 }
8323 SKIP(2);
8324
William M. Brack13dfa872004-09-18 04:52:08 +00008325 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008326 if (ctxt->input->cur[tlen] == '>') {
8327 ctxt->input->cur += tlen + 1;
8328 goto done;
8329 }
8330 ctxt->input->cur += tlen;
8331 name = (xmlChar*)1;
8332 } else {
8333 if (prefix == NULL)
8334 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8335 else
8336 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8337 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008338
8339 /*
8340 * We should definitely be at the ending "S? '>'" part
8341 */
8342 GROW;
8343 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008344 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008345 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008346 } else
8347 NEXT1;
8348
8349 /*
8350 * [ WFC: Element Type Match ]
8351 * The Name in an element's end-tag must match the element type in the
8352 * start-tag.
8353 *
8354 */
8355 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008356 if (name == NULL) name = BAD_CAST "unparseable";
8357 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008358 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008359 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008360 }
8361
8362 /*
8363 * SAX: End of Tag
8364 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008365done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008366 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8367 (!ctxt->disableSAX))
8368 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8369
Daniel Veillard0fb18932003-09-07 09:14:37 +00008370 spacePop(ctxt);
8371 if (nsNr != 0)
8372 nsPop(ctxt, nsNr);
8373 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008374}
8375
8376/**
Owen Taylor3473f882001-02-23 17:55:21 +00008377 * xmlParseCDSect:
8378 * @ctxt: an XML parser context
8379 *
8380 * Parse escaped pure raw content.
8381 *
8382 * [18] CDSect ::= CDStart CData CDEnd
8383 *
8384 * [19] CDStart ::= '<![CDATA['
8385 *
8386 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8387 *
8388 * [21] CDEnd ::= ']]>'
8389 */
8390void
8391xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8392 xmlChar *buf = NULL;
8393 int len = 0;
8394 int size = XML_PARSER_BUFFER_SIZE;
8395 int r, rl;
8396 int s, sl;
8397 int cur, l;
8398 int count = 0;
8399
Daniel Veillard8f597c32003-10-06 08:19:27 +00008400 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008401 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008402 SKIP(9);
8403 } else
8404 return;
8405
8406 ctxt->instate = XML_PARSER_CDATA_SECTION;
8407 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008408 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008409 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008410 ctxt->instate = XML_PARSER_CONTENT;
8411 return;
8412 }
8413 NEXTL(rl);
8414 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008415 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008416 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 ctxt->instate = XML_PARSER_CONTENT;
8418 return;
8419 }
8420 NEXTL(sl);
8421 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008422 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008423 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008424 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008425 return;
8426 }
William M. Brack871611b2003-10-18 04:53:14 +00008427 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008428 ((r != ']') || (s != ']') || (cur != '>'))) {
8429 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008430 xmlChar *tmp;
8431
Owen Taylor3473f882001-02-23 17:55:21 +00008432 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008433 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8434 if (tmp == NULL) {
8435 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008436 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008437 return;
8438 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008439 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 }
8441 COPY_BUF(rl,buf,len,r);
8442 r = s;
8443 rl = sl;
8444 s = cur;
8445 sl = l;
8446 count++;
8447 if (count > 50) {
8448 GROW;
8449 count = 0;
8450 }
8451 NEXTL(l);
8452 cur = CUR_CHAR(l);
8453 }
8454 buf[len] = 0;
8455 ctxt->instate = XML_PARSER_CONTENT;
8456 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008457 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008458 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008459 xmlFree(buf);
8460 return;
8461 }
8462 NEXTL(l);
8463
8464 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008465 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008466 */
8467 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8468 if (ctxt->sax->cdataBlock != NULL)
8469 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008470 else if (ctxt->sax->characters != NULL)
8471 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008472 }
8473 xmlFree(buf);
8474}
8475
8476/**
8477 * xmlParseContent:
8478 * @ctxt: an XML parser context
8479 *
8480 * Parse a content:
8481 *
8482 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8483 */
8484
8485void
8486xmlParseContent(xmlParserCtxtPtr ctxt) {
8487 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008488 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008489 ((RAW != '<') || (NXT(1) != '/')) &&
8490 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008491 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008492 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008493 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008494
8495 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008496 * First case : a Processing Instruction.
8497 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008498 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008499 xmlParsePI(ctxt);
8500 }
8501
8502 /*
8503 * Second case : a CDSection
8504 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008505 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008506 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008507 xmlParseCDSect(ctxt);
8508 }
8509
8510 /*
8511 * Third case : a comment
8512 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008513 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008514 (NXT(2) == '-') && (NXT(3) == '-')) {
8515 xmlParseComment(ctxt);
8516 ctxt->instate = XML_PARSER_CONTENT;
8517 }
8518
8519 /*
8520 * Fourth case : a sub-element.
8521 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008522 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008523 xmlParseElement(ctxt);
8524 }
8525
8526 /*
8527 * Fifth case : a reference. If if has not been resolved,
8528 * parsing returns it's Name, create the node
8529 */
8530
Daniel Veillard21a0f912001-02-25 19:54:14 +00008531 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008532 xmlParseReference(ctxt);
8533 }
8534
8535 /*
8536 * Last case, text. Note that References are handled directly.
8537 */
8538 else {
8539 xmlParseCharData(ctxt, 0);
8540 }
8541
8542 GROW;
8543 /*
8544 * Pop-up of finished entities.
8545 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008546 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008547 xmlPopInput(ctxt);
8548 SHRINK;
8549
Daniel Veillardfdc91562002-07-01 21:52:03 +00008550 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008551 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8552 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008553 ctxt->instate = XML_PARSER_EOF;
8554 break;
8555 }
8556 }
8557}
8558
8559/**
8560 * xmlParseElement:
8561 * @ctxt: an XML parser context
8562 *
8563 * parse an XML element, this is highly recursive
8564 *
8565 * [39] element ::= EmptyElemTag | STag content ETag
8566 *
8567 * [ WFC: Element Type Match ]
8568 * The Name in an element's end-tag must match the element type in the
8569 * start-tag.
8570 *
Owen Taylor3473f882001-02-23 17:55:21 +00008571 */
8572
8573void
8574xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008575 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008576 const xmlChar *prefix;
8577 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008578 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008579 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008580 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008581 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008582
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008583 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8584 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8585 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8586 xmlParserMaxDepth);
8587 ctxt->instate = XML_PARSER_EOF;
8588 return;
8589 }
8590
Owen Taylor3473f882001-02-23 17:55:21 +00008591 /* Capture start position */
8592 if (ctxt->record_info) {
8593 node_info.begin_pos = ctxt->input->consumed +
8594 (CUR_PTR - ctxt->input->base);
8595 node_info.begin_line = ctxt->input->line;
8596 }
8597
8598 if (ctxt->spaceNr == 0)
8599 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008600 else if (*ctxt->space == -2)
8601 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008602 else
8603 spacePush(ctxt, *ctxt->space);
8604
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008605 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008606#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008607 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008608#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008609 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008610#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008611 else
8612 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008613#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008614 if (name == NULL) {
8615 spacePop(ctxt);
8616 return;
8617 }
8618 namePush(ctxt, name);
8619 ret = ctxt->node;
8620
Daniel Veillard4432df22003-09-28 18:58:27 +00008621#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008622 /*
8623 * [ VC: Root Element Type ]
8624 * The Name in the document type declaration must match the element
8625 * type of the root element.
8626 */
8627 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8628 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8629 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008630#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008631
8632 /*
8633 * Check for an Empty Element.
8634 */
8635 if ((RAW == '/') && (NXT(1) == '>')) {
8636 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 if (ctxt->sax2) {
8638 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8639 (!ctxt->disableSAX))
8640 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008641#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 } else {
8643 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8644 (!ctxt->disableSAX))
8645 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008646#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008647 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008648 namePop(ctxt);
8649 spacePop(ctxt);
8650 if (nsNr != ctxt->nsNr)
8651 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008652 if ( ret != NULL && ctxt->record_info ) {
8653 node_info.end_pos = ctxt->input->consumed +
8654 (CUR_PTR - ctxt->input->base);
8655 node_info.end_line = ctxt->input->line;
8656 node_info.node = ret;
8657 xmlParserAddNodeInfo(ctxt, &node_info);
8658 }
8659 return;
8660 }
8661 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008662 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008663 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008664 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8665 "Couldn't find end of Start Tag %s line %d\n",
8666 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008667
8668 /*
8669 * end of parsing of this node.
8670 */
8671 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008673 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 if (nsNr != ctxt->nsNr)
8675 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008676
8677 /*
8678 * Capture end position and add node
8679 */
8680 if ( ret != NULL && ctxt->record_info ) {
8681 node_info.end_pos = ctxt->input->consumed +
8682 (CUR_PTR - ctxt->input->base);
8683 node_info.end_line = ctxt->input->line;
8684 node_info.node = ret;
8685 xmlParserAddNodeInfo(ctxt, &node_info);
8686 }
8687 return;
8688 }
8689
8690 /*
8691 * Parse the content of the element:
8692 */
8693 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008694 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008695 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008696 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008697 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008698
8699 /*
8700 * end of parsing of this node.
8701 */
8702 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008703 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008704 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 if (nsNr != ctxt->nsNr)
8706 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008707 return;
8708 }
8709
8710 /*
8711 * parse the end of tag: '</' should be here.
8712 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008713 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008714 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008715 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008716 }
8717#ifdef LIBXML_SAX1_ENABLED
8718 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008719 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008720#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008721
8722 /*
8723 * Capture end position and add node
8724 */
8725 if ( ret != NULL && ctxt->record_info ) {
8726 node_info.end_pos = ctxt->input->consumed +
8727 (CUR_PTR - ctxt->input->base);
8728 node_info.end_line = ctxt->input->line;
8729 node_info.node = ret;
8730 xmlParserAddNodeInfo(ctxt, &node_info);
8731 }
8732}
8733
8734/**
8735 * xmlParseVersionNum:
8736 * @ctxt: an XML parser context
8737 *
8738 * parse the XML version value.
8739 *
8740 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8741 *
8742 * Returns the string giving the XML version number, or NULL
8743 */
8744xmlChar *
8745xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8746 xmlChar *buf = NULL;
8747 int len = 0;
8748 int size = 10;
8749 xmlChar cur;
8750
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008751 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008752 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008753 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008754 return(NULL);
8755 }
8756 cur = CUR;
8757 while (((cur >= 'a') && (cur <= 'z')) ||
8758 ((cur >= 'A') && (cur <= 'Z')) ||
8759 ((cur >= '0') && (cur <= '9')) ||
8760 (cur == '_') || (cur == '.') ||
8761 (cur == ':') || (cur == '-')) {
8762 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008763 xmlChar *tmp;
8764
Owen Taylor3473f882001-02-23 17:55:21 +00008765 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008766 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8767 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008768 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008769 return(NULL);
8770 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008771 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008772 }
8773 buf[len++] = cur;
8774 NEXT;
8775 cur=CUR;
8776 }
8777 buf[len] = 0;
8778 return(buf);
8779}
8780
8781/**
8782 * xmlParseVersionInfo:
8783 * @ctxt: an XML parser context
8784 *
8785 * parse the XML version.
8786 *
8787 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8788 *
8789 * [25] Eq ::= S? '=' S?
8790 *
8791 * Returns the version string, e.g. "1.0"
8792 */
8793
8794xmlChar *
8795xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8796 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008797
Daniel Veillarda07050d2003-10-19 14:46:32 +00008798 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008799 SKIP(7);
8800 SKIP_BLANKS;
8801 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008802 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008803 return(NULL);
8804 }
8805 NEXT;
8806 SKIP_BLANKS;
8807 if (RAW == '"') {
8808 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008809 version = xmlParseVersionNum(ctxt);
8810 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008811 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008812 } else
8813 NEXT;
8814 } else if (RAW == '\''){
8815 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008816 version = xmlParseVersionNum(ctxt);
8817 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008818 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008819 } else
8820 NEXT;
8821 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008822 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008823 }
8824 }
8825 return(version);
8826}
8827
8828/**
8829 * xmlParseEncName:
8830 * @ctxt: an XML parser context
8831 *
8832 * parse the XML encoding name
8833 *
8834 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8835 *
8836 * Returns the encoding name value or NULL
8837 */
8838xmlChar *
8839xmlParseEncName(xmlParserCtxtPtr ctxt) {
8840 xmlChar *buf = NULL;
8841 int len = 0;
8842 int size = 10;
8843 xmlChar cur;
8844
8845 cur = CUR;
8846 if (((cur >= 'a') && (cur <= 'z')) ||
8847 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008848 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008849 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008850 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008851 return(NULL);
8852 }
8853
8854 buf[len++] = cur;
8855 NEXT;
8856 cur = CUR;
8857 while (((cur >= 'a') && (cur <= 'z')) ||
8858 ((cur >= 'A') && (cur <= 'Z')) ||
8859 ((cur >= '0') && (cur <= '9')) ||
8860 (cur == '.') || (cur == '_') ||
8861 (cur == '-')) {
8862 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008863 xmlChar *tmp;
8864
Owen Taylor3473f882001-02-23 17:55:21 +00008865 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008866 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8867 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008869 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008870 return(NULL);
8871 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008872 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008873 }
8874 buf[len++] = cur;
8875 NEXT;
8876 cur = CUR;
8877 if (cur == 0) {
8878 SHRINK;
8879 GROW;
8880 cur = CUR;
8881 }
8882 }
8883 buf[len] = 0;
8884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008885 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008886 }
8887 return(buf);
8888}
8889
8890/**
8891 * xmlParseEncodingDecl:
8892 * @ctxt: an XML parser context
8893 *
8894 * parse the XML encoding declaration
8895 *
8896 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8897 *
8898 * this setups the conversion filters.
8899 *
8900 * Returns the encoding value or NULL
8901 */
8902
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008903const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008904xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8905 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008906
8907 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008908 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008909 SKIP(8);
8910 SKIP_BLANKS;
8911 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008912 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008913 return(NULL);
8914 }
8915 NEXT;
8916 SKIP_BLANKS;
8917 if (RAW == '"') {
8918 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008919 encoding = xmlParseEncName(ctxt);
8920 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008921 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008922 } else
8923 NEXT;
8924 } else if (RAW == '\''){
8925 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008926 encoding = xmlParseEncName(ctxt);
8927 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008928 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008929 } else
8930 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008931 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008932 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008933 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008934 /*
8935 * UTF-16 encoding stwich has already taken place at this stage,
8936 * more over the little-endian/big-endian selection is already done
8937 */
8938 if ((encoding != NULL) &&
8939 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8940 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008941 if (ctxt->encoding != NULL)
8942 xmlFree((xmlChar *) ctxt->encoding);
8943 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008944 }
8945 /*
8946 * UTF-8 encoding is handled natively
8947 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008948 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008949 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8950 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008951 if (ctxt->encoding != NULL)
8952 xmlFree((xmlChar *) ctxt->encoding);
8953 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008954 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008955 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008956 xmlCharEncodingHandlerPtr handler;
8957
8958 if (ctxt->input->encoding != NULL)
8959 xmlFree((xmlChar *) ctxt->input->encoding);
8960 ctxt->input->encoding = encoding;
8961
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008962 handler = xmlFindCharEncodingHandler((const char *) encoding);
8963 if (handler != NULL) {
8964 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008965 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008967 "Unsupported encoding %s\n", encoding);
8968 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008969 }
8970 }
8971 }
8972 return(encoding);
8973}
8974
8975/**
8976 * xmlParseSDDecl:
8977 * @ctxt: an XML parser context
8978 *
8979 * parse the XML standalone declaration
8980 *
8981 * [32] SDDecl ::= S 'standalone' Eq
8982 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8983 *
8984 * [ VC: Standalone Document Declaration ]
8985 * TODO The standalone document declaration must have the value "no"
8986 * if any external markup declarations contain declarations of:
8987 * - attributes with default values, if elements to which these
8988 * attributes apply appear in the document without specifications
8989 * of values for these attributes, or
8990 * - entities (other than amp, lt, gt, apos, quot), if references
8991 * to those entities appear in the document, or
8992 * - attributes with values subject to normalization, where the
8993 * attribute appears in the document with a value which will change
8994 * as a result of normalization, or
8995 * - element types with element content, if white space occurs directly
8996 * within any instance of those types.
8997 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008998 * Returns:
8999 * 1 if standalone="yes"
9000 * 0 if standalone="no"
9001 * -2 if standalone attribute is missing or invalid
9002 * (A standalone value of -2 means that the XML declaration was found,
9003 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009004 */
9005
9006int
9007xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009008 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009009
9010 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009011 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009012 SKIP(10);
9013 SKIP_BLANKS;
9014 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009015 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009016 return(standalone);
9017 }
9018 NEXT;
9019 SKIP_BLANKS;
9020 if (RAW == '\''){
9021 NEXT;
9022 if ((RAW == 'n') && (NXT(1) == 'o')) {
9023 standalone = 0;
9024 SKIP(2);
9025 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9026 (NXT(2) == 's')) {
9027 standalone = 1;
9028 SKIP(3);
9029 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009030 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009031 }
9032 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009033 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009034 } else
9035 NEXT;
9036 } else if (RAW == '"'){
9037 NEXT;
9038 if ((RAW == 'n') && (NXT(1) == 'o')) {
9039 standalone = 0;
9040 SKIP(2);
9041 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9042 (NXT(2) == 's')) {
9043 standalone = 1;
9044 SKIP(3);
9045 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009046 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009047 }
9048 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009049 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009050 } else
9051 NEXT;
9052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009053 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009054 }
9055 }
9056 return(standalone);
9057}
9058
9059/**
9060 * xmlParseXMLDecl:
9061 * @ctxt: an XML parser context
9062 *
9063 * parse an XML declaration header
9064 *
9065 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9066 */
9067
9068void
9069xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9070 xmlChar *version;
9071
9072 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009073 * This value for standalone indicates that the document has an
9074 * XML declaration but it does not have a standalone attribute.
9075 * It will be overwritten later if a standalone attribute is found.
9076 */
9077 ctxt->input->standalone = -2;
9078
9079 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009080 * We know that '<?xml' is here.
9081 */
9082 SKIP(5);
9083
William M. Brack76e95df2003-10-18 16:20:14 +00009084 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9086 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009087 }
9088 SKIP_BLANKS;
9089
9090 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009091 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009092 */
9093 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009094 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009095 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009096 } else {
9097 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9098 /*
9099 * TODO: Blueberry should be detected here
9100 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009101 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9102 "Unsupported version '%s'\n",
9103 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009104 }
9105 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009106 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009107 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009108 }
Owen Taylor3473f882001-02-23 17:55:21 +00009109
9110 /*
9111 * We may have the encoding declaration
9112 */
William M. Brack76e95df2003-10-18 16:20:14 +00009113 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009114 if ((RAW == '?') && (NXT(1) == '>')) {
9115 SKIP(2);
9116 return;
9117 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009118 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009119 }
9120 xmlParseEncodingDecl(ctxt);
9121 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9122 /*
9123 * The XML REC instructs us to stop parsing right here
9124 */
9125 return;
9126 }
9127
9128 /*
9129 * We may have the standalone status.
9130 */
William M. Brack76e95df2003-10-18 16:20:14 +00009131 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009132 if ((RAW == '?') && (NXT(1) == '>')) {
9133 SKIP(2);
9134 return;
9135 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009137 }
9138 SKIP_BLANKS;
9139 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9140
9141 SKIP_BLANKS;
9142 if ((RAW == '?') && (NXT(1) == '>')) {
9143 SKIP(2);
9144 } else if (RAW == '>') {
9145 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009146 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009147 NEXT;
9148 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009149 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009150 MOVETO_ENDTAG(CUR_PTR);
9151 NEXT;
9152 }
9153}
9154
9155/**
9156 * xmlParseMisc:
9157 * @ctxt: an XML parser context
9158 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009159 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009160 *
9161 * [27] Misc ::= Comment | PI | S
9162 */
9163
9164void
9165xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009166 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009167 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009168 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009169 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009170 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009171 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009172 NEXT;
9173 } else
9174 xmlParseComment(ctxt);
9175 }
9176}
9177
9178/**
9179 * xmlParseDocument:
9180 * @ctxt: an XML parser context
9181 *
9182 * parse an XML document (and build a tree if using the standard SAX
9183 * interface).
9184 *
9185 * [1] document ::= prolog element Misc*
9186 *
9187 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9188 *
9189 * Returns 0, -1 in case of error. the parser context is augmented
9190 * as a result of the parsing.
9191 */
9192
9193int
9194xmlParseDocument(xmlParserCtxtPtr ctxt) {
9195 xmlChar start[4];
9196 xmlCharEncoding enc;
9197
9198 xmlInitParser();
9199
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009200 if ((ctxt == NULL) || (ctxt->input == NULL))
9201 return(-1);
9202
Owen Taylor3473f882001-02-23 17:55:21 +00009203 GROW;
9204
9205 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009206 * SAX: detecting the level.
9207 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009208 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009209
9210 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009211 * SAX: beginning of the document processing.
9212 */
9213 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9214 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9215
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009216 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9217 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009218 /*
9219 * Get the 4 first bytes and decode the charset
9220 * if enc != XML_CHAR_ENCODING_NONE
9221 * plug some encoding conversion routines.
9222 */
9223 start[0] = RAW;
9224 start[1] = NXT(1);
9225 start[2] = NXT(2);
9226 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009227 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009228 if (enc != XML_CHAR_ENCODING_NONE) {
9229 xmlSwitchEncoding(ctxt, enc);
9230 }
Owen Taylor3473f882001-02-23 17:55:21 +00009231 }
9232
9233
9234 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009235 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009236 }
9237
9238 /*
9239 * Check for the XMLDecl in the Prolog.
9240 */
9241 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009242 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009243
9244 /*
9245 * Note that we will switch encoding on the fly.
9246 */
9247 xmlParseXMLDecl(ctxt);
9248 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9249 /*
9250 * The XML REC instructs us to stop parsing right here
9251 */
9252 return(-1);
9253 }
9254 ctxt->standalone = ctxt->input->standalone;
9255 SKIP_BLANKS;
9256 } else {
9257 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9258 }
9259 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9260 ctxt->sax->startDocument(ctxt->userData);
9261
9262 /*
9263 * The Misc part of the Prolog
9264 */
9265 GROW;
9266 xmlParseMisc(ctxt);
9267
9268 /*
9269 * Then possibly doc type declaration(s) and more Misc
9270 * (doctypedecl Misc*)?
9271 */
9272 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009273 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009274
9275 ctxt->inSubset = 1;
9276 xmlParseDocTypeDecl(ctxt);
9277 if (RAW == '[') {
9278 ctxt->instate = XML_PARSER_DTD;
9279 xmlParseInternalSubset(ctxt);
9280 }
9281
9282 /*
9283 * Create and update the external subset.
9284 */
9285 ctxt->inSubset = 2;
9286 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9287 (!ctxt->disableSAX))
9288 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9289 ctxt->extSubSystem, ctxt->extSubURI);
9290 ctxt->inSubset = 0;
9291
Daniel Veillardac4118d2008-01-11 05:27:32 +00009292 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009293
9294 ctxt->instate = XML_PARSER_PROLOG;
9295 xmlParseMisc(ctxt);
9296 }
9297
9298 /*
9299 * Time to start parsing the tree itself
9300 */
9301 GROW;
9302 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009303 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9304 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009305 } else {
9306 ctxt->instate = XML_PARSER_CONTENT;
9307 xmlParseElement(ctxt);
9308 ctxt->instate = XML_PARSER_EPILOG;
9309
9310
9311 /*
9312 * The Misc part at the end
9313 */
9314 xmlParseMisc(ctxt);
9315
Daniel Veillard561b7f82002-03-20 21:55:57 +00009316 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009317 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009318 }
9319 ctxt->instate = XML_PARSER_EOF;
9320 }
9321
9322 /*
9323 * SAX: end of the document processing.
9324 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009325 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009326 ctxt->sax->endDocument(ctxt->userData);
9327
Daniel Veillard5997aca2002-03-18 18:36:20 +00009328 /*
9329 * Remove locally kept entity definitions if the tree was not built
9330 */
9331 if ((ctxt->myDoc != NULL) &&
9332 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9333 xmlFreeDoc(ctxt->myDoc);
9334 ctxt->myDoc = NULL;
9335 }
9336
Daniel Veillardc7612992002-02-17 22:47:37 +00009337 if (! ctxt->wellFormed) {
9338 ctxt->valid = 0;
9339 return(-1);
9340 }
Owen Taylor3473f882001-02-23 17:55:21 +00009341 return(0);
9342}
9343
9344/**
9345 * xmlParseExtParsedEnt:
9346 * @ctxt: an XML parser context
9347 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009348 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009349 * An external general parsed entity is well-formed if it matches the
9350 * production labeled extParsedEnt.
9351 *
9352 * [78] extParsedEnt ::= TextDecl? content
9353 *
9354 * Returns 0, -1 in case of error. the parser context is augmented
9355 * as a result of the parsing.
9356 */
9357
9358int
9359xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9360 xmlChar start[4];
9361 xmlCharEncoding enc;
9362
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009363 if ((ctxt == NULL) || (ctxt->input == NULL))
9364 return(-1);
9365
Owen Taylor3473f882001-02-23 17:55:21 +00009366 xmlDefaultSAXHandlerInit();
9367
Daniel Veillard309f81d2003-09-23 09:02:53 +00009368 xmlDetectSAX2(ctxt);
9369
Owen Taylor3473f882001-02-23 17:55:21 +00009370 GROW;
9371
9372 /*
9373 * SAX: beginning of the document processing.
9374 */
9375 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9376 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9377
9378 /*
9379 * Get the 4 first bytes and decode the charset
9380 * if enc != XML_CHAR_ENCODING_NONE
9381 * plug some encoding conversion routines.
9382 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009383 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9384 start[0] = RAW;
9385 start[1] = NXT(1);
9386 start[2] = NXT(2);
9387 start[3] = NXT(3);
9388 enc = xmlDetectCharEncoding(start, 4);
9389 if (enc != XML_CHAR_ENCODING_NONE) {
9390 xmlSwitchEncoding(ctxt, enc);
9391 }
Owen Taylor3473f882001-02-23 17:55:21 +00009392 }
9393
9394
9395 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009396 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009397 }
9398
9399 /*
9400 * Check for the XMLDecl in the Prolog.
9401 */
9402 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009403 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009404
9405 /*
9406 * Note that we will switch encoding on the fly.
9407 */
9408 xmlParseXMLDecl(ctxt);
9409 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9410 /*
9411 * The XML REC instructs us to stop parsing right here
9412 */
9413 return(-1);
9414 }
9415 SKIP_BLANKS;
9416 } else {
9417 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9418 }
9419 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9420 ctxt->sax->startDocument(ctxt->userData);
9421
9422 /*
9423 * Doing validity checking on chunk doesn't make sense
9424 */
9425 ctxt->instate = XML_PARSER_CONTENT;
9426 ctxt->validate = 0;
9427 ctxt->loadsubset = 0;
9428 ctxt->depth = 0;
9429
9430 xmlParseContent(ctxt);
9431
9432 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009433 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009434 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009435 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009436 }
9437
9438 /*
9439 * SAX: end of the document processing.
9440 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009441 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009442 ctxt->sax->endDocument(ctxt->userData);
9443
9444 if (! ctxt->wellFormed) return(-1);
9445 return(0);
9446}
9447
Daniel Veillard73b013f2003-09-30 12:36:01 +00009448#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009449/************************************************************************
9450 * *
9451 * Progressive parsing interfaces *
9452 * *
9453 ************************************************************************/
9454
9455/**
9456 * xmlParseLookupSequence:
9457 * @ctxt: an XML parser context
9458 * @first: the first char to lookup
9459 * @next: the next char to lookup or zero
9460 * @third: the next char to lookup or zero
9461 *
9462 * Try to find if a sequence (first, next, third) or just (first next) or
9463 * (first) is available in the input stream.
9464 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9465 * to avoid rescanning sequences of bytes, it DOES change the state of the
9466 * parser, do not use liberally.
9467 *
9468 * Returns the index to the current parsing point if the full sequence
9469 * is available, -1 otherwise.
9470 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009471static int
Owen Taylor3473f882001-02-23 17:55:21 +00009472xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9473 xmlChar next, xmlChar third) {
9474 int base, len;
9475 xmlParserInputPtr in;
9476 const xmlChar *buf;
9477
9478 in = ctxt->input;
9479 if (in == NULL) return(-1);
9480 base = in->cur - in->base;
9481 if (base < 0) return(-1);
9482 if (ctxt->checkIndex > base)
9483 base = ctxt->checkIndex;
9484 if (in->buf == NULL) {
9485 buf = in->base;
9486 len = in->length;
9487 } else {
9488 buf = in->buf->buffer->content;
9489 len = in->buf->buffer->use;
9490 }
9491 /* take into account the sequence length */
9492 if (third) len -= 2;
9493 else if (next) len --;
9494 for (;base < len;base++) {
9495 if (buf[base] == first) {
9496 if (third != 0) {
9497 if ((buf[base + 1] != next) ||
9498 (buf[base + 2] != third)) continue;
9499 } else if (next != 0) {
9500 if (buf[base + 1] != next) continue;
9501 }
9502 ctxt->checkIndex = 0;
9503#ifdef DEBUG_PUSH
9504 if (next == 0)
9505 xmlGenericError(xmlGenericErrorContext,
9506 "PP: lookup '%c' found at %d\n",
9507 first, base);
9508 else if (third == 0)
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: lookup '%c%c' found at %d\n",
9511 first, next, base);
9512 else
9513 xmlGenericError(xmlGenericErrorContext,
9514 "PP: lookup '%c%c%c' found at %d\n",
9515 first, next, third, base);
9516#endif
9517 return(base - (in->cur - in->base));
9518 }
9519 }
9520 ctxt->checkIndex = base;
9521#ifdef DEBUG_PUSH
9522 if (next == 0)
9523 xmlGenericError(xmlGenericErrorContext,
9524 "PP: lookup '%c' failed\n", first);
9525 else if (third == 0)
9526 xmlGenericError(xmlGenericErrorContext,
9527 "PP: lookup '%c%c' failed\n", first, next);
9528 else
9529 xmlGenericError(xmlGenericErrorContext,
9530 "PP: lookup '%c%c%c' failed\n", first, next, third);
9531#endif
9532 return(-1);
9533}
9534
9535/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009536 * xmlParseGetLasts:
9537 * @ctxt: an XML parser context
9538 * @lastlt: pointer to store the last '<' from the input
9539 * @lastgt: pointer to store the last '>' from the input
9540 *
9541 * Lookup the last < and > in the current chunk
9542 */
9543static void
9544xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9545 const xmlChar **lastgt) {
9546 const xmlChar *tmp;
9547
9548 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9549 xmlGenericError(xmlGenericErrorContext,
9550 "Internal error: xmlParseGetLasts\n");
9551 return;
9552 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009553 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 tmp = ctxt->input->end;
9555 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009556 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009557 if (tmp < ctxt->input->base) {
9558 *lastlt = NULL;
9559 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009560 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009561 *lastlt = tmp;
9562 tmp++;
9563 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9564 if (*tmp == '\'') {
9565 tmp++;
9566 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9567 if (tmp < ctxt->input->end) tmp++;
9568 } else if (*tmp == '"') {
9569 tmp++;
9570 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9571 if (tmp < ctxt->input->end) tmp++;
9572 } else
9573 tmp++;
9574 }
9575 if (tmp < ctxt->input->end)
9576 *lastgt = tmp;
9577 else {
9578 tmp = *lastlt;
9579 tmp--;
9580 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9581 if (tmp >= ctxt->input->base)
9582 *lastgt = tmp;
9583 else
9584 *lastgt = NULL;
9585 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009586 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 } else {
9588 *lastlt = NULL;
9589 *lastgt = NULL;
9590 }
9591}
9592/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009593 * xmlCheckCdataPush:
9594 * @cur: pointer to the bock of characters
9595 * @len: length of the block in bytes
9596 *
9597 * Check that the block of characters is okay as SCdata content [20]
9598 *
9599 * Returns the number of bytes to pass if okay, a negative index where an
9600 * UTF-8 error occured otherwise
9601 */
9602static int
9603xmlCheckCdataPush(const xmlChar *utf, int len) {
9604 int ix;
9605 unsigned char c;
9606 int codepoint;
9607
9608 if ((utf == NULL) || (len <= 0))
9609 return(0);
9610
9611 for (ix = 0; ix < len;) { /* string is 0-terminated */
9612 c = utf[ix];
9613 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9614 if (c >= 0x20)
9615 ix++;
9616 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9617 ix++;
9618 else
9619 return(-ix);
9620 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9621 if (ix + 2 > len) return(ix);
9622 if ((utf[ix+1] & 0xc0 ) != 0x80)
9623 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009624 codepoint = (utf[ix] & 0x1f) << 6;
9625 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009626 if (!xmlIsCharQ(codepoint))
9627 return(-ix);
9628 ix += 2;
9629 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9630 if (ix + 3 > len) return(ix);
9631 if (((utf[ix+1] & 0xc0) != 0x80) ||
9632 ((utf[ix+2] & 0xc0) != 0x80))
9633 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009634 codepoint = (utf[ix] & 0xf) << 12;
9635 codepoint |= (utf[ix+1] & 0x3f) << 6;
9636 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009637 if (!xmlIsCharQ(codepoint))
9638 return(-ix);
9639 ix += 3;
9640 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9641 if (ix + 4 > len) return(ix);
9642 if (((utf[ix+1] & 0xc0) != 0x80) ||
9643 ((utf[ix+2] & 0xc0) != 0x80) ||
9644 ((utf[ix+3] & 0xc0) != 0x80))
9645 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009646 codepoint = (utf[ix] & 0x7) << 18;
9647 codepoint |= (utf[ix+1] & 0x3f) << 12;
9648 codepoint |= (utf[ix+2] & 0x3f) << 6;
9649 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009650 if (!xmlIsCharQ(codepoint))
9651 return(-ix);
9652 ix += 4;
9653 } else /* unknown encoding */
9654 return(-ix);
9655 }
9656 return(ix);
9657}
9658
9659/**
Owen Taylor3473f882001-02-23 17:55:21 +00009660 * xmlParseTryOrFinish:
9661 * @ctxt: an XML parser context
9662 * @terminate: last chunk indicator
9663 *
9664 * Try to progress on parsing
9665 *
9666 * Returns zero if no parsing was possible
9667 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009668static int
Owen Taylor3473f882001-02-23 17:55:21 +00009669xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9670 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009671 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009672 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009673 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009674
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009675 if (ctxt->input == NULL)
9676 return(0);
9677
Owen Taylor3473f882001-02-23 17:55:21 +00009678#ifdef DEBUG_PUSH
9679 switch (ctxt->instate) {
9680 case XML_PARSER_EOF:
9681 xmlGenericError(xmlGenericErrorContext,
9682 "PP: try EOF\n"); break;
9683 case XML_PARSER_START:
9684 xmlGenericError(xmlGenericErrorContext,
9685 "PP: try START\n"); break;
9686 case XML_PARSER_MISC:
9687 xmlGenericError(xmlGenericErrorContext,
9688 "PP: try MISC\n");break;
9689 case XML_PARSER_COMMENT:
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: try COMMENT\n");break;
9692 case XML_PARSER_PROLOG:
9693 xmlGenericError(xmlGenericErrorContext,
9694 "PP: try PROLOG\n");break;
9695 case XML_PARSER_START_TAG:
9696 xmlGenericError(xmlGenericErrorContext,
9697 "PP: try START_TAG\n");break;
9698 case XML_PARSER_CONTENT:
9699 xmlGenericError(xmlGenericErrorContext,
9700 "PP: try CONTENT\n");break;
9701 case XML_PARSER_CDATA_SECTION:
9702 xmlGenericError(xmlGenericErrorContext,
9703 "PP: try CDATA_SECTION\n");break;
9704 case XML_PARSER_END_TAG:
9705 xmlGenericError(xmlGenericErrorContext,
9706 "PP: try END_TAG\n");break;
9707 case XML_PARSER_ENTITY_DECL:
9708 xmlGenericError(xmlGenericErrorContext,
9709 "PP: try ENTITY_DECL\n");break;
9710 case XML_PARSER_ENTITY_VALUE:
9711 xmlGenericError(xmlGenericErrorContext,
9712 "PP: try ENTITY_VALUE\n");break;
9713 case XML_PARSER_ATTRIBUTE_VALUE:
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: try ATTRIBUTE_VALUE\n");break;
9716 case XML_PARSER_DTD:
9717 xmlGenericError(xmlGenericErrorContext,
9718 "PP: try DTD\n");break;
9719 case XML_PARSER_EPILOG:
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: try EPILOG\n");break;
9722 case XML_PARSER_PI:
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: try PI\n");break;
9725 case XML_PARSER_IGNORE:
9726 xmlGenericError(xmlGenericErrorContext,
9727 "PP: try IGNORE\n");break;
9728 }
9729#endif
9730
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009731 if ((ctxt->input != NULL) &&
9732 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009733 xmlSHRINK(ctxt);
9734 ctxt->checkIndex = 0;
9735 }
9736 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009737
Daniel Veillarda880b122003-04-21 21:36:41 +00009738 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009739 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009740 return(0);
9741
9742
Owen Taylor3473f882001-02-23 17:55:21 +00009743 /*
9744 * Pop-up of finished entities.
9745 */
9746 while ((RAW == 0) && (ctxt->inputNr > 1))
9747 xmlPopInput(ctxt);
9748
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009749 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009750 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 avail = ctxt->input->length -
9752 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009753 else {
9754 /*
9755 * If we are operating on converted input, try to flush
9756 * remainng chars to avoid them stalling in the non-converted
9757 * buffer.
9758 */
9759 if ((ctxt->input->buf->raw != NULL) &&
9760 (ctxt->input->buf->raw->use > 0)) {
9761 int base = ctxt->input->base -
9762 ctxt->input->buf->buffer->content;
9763 int current = ctxt->input->cur - ctxt->input->base;
9764
9765 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9766 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9767 ctxt->input->cur = ctxt->input->base + current;
9768 ctxt->input->end =
9769 &ctxt->input->buf->buffer->content[
9770 ctxt->input->buf->buffer->use];
9771 }
9772 avail = ctxt->input->buf->buffer->use -
9773 (ctxt->input->cur - ctxt->input->base);
9774 }
Owen Taylor3473f882001-02-23 17:55:21 +00009775 if (avail < 1)
9776 goto done;
9777 switch (ctxt->instate) {
9778 case XML_PARSER_EOF:
9779 /*
9780 * Document parsing is done !
9781 */
9782 goto done;
9783 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009784 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9785 xmlChar start[4];
9786 xmlCharEncoding enc;
9787
9788 /*
9789 * Very first chars read from the document flow.
9790 */
9791 if (avail < 4)
9792 goto done;
9793
9794 /*
9795 * Get the 4 first bytes and decode the charset
9796 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009797 * plug some encoding conversion routines,
9798 * else xmlSwitchEncoding will set to (default)
9799 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009800 */
9801 start[0] = RAW;
9802 start[1] = NXT(1);
9803 start[2] = NXT(2);
9804 start[3] = NXT(3);
9805 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009806 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009807 break;
9808 }
Owen Taylor3473f882001-02-23 17:55:21 +00009809
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009810 if (avail < 2)
9811 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009812 cur = ctxt->input->cur[0];
9813 next = ctxt->input->cur[1];
9814 if (cur == 0) {
9815 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9816 ctxt->sax->setDocumentLocator(ctxt->userData,
9817 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009818 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009819 ctxt->instate = XML_PARSER_EOF;
9820#ifdef DEBUG_PUSH
9821 xmlGenericError(xmlGenericErrorContext,
9822 "PP: entering EOF\n");
9823#endif
9824 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9825 ctxt->sax->endDocument(ctxt->userData);
9826 goto done;
9827 }
9828 if ((cur == '<') && (next == '?')) {
9829 /* PI or XML decl */
9830 if (avail < 5) return(ret);
9831 if ((!terminate) &&
9832 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9833 return(ret);
9834 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9835 ctxt->sax->setDocumentLocator(ctxt->userData,
9836 &xmlDefaultSAXLocator);
9837 if ((ctxt->input->cur[2] == 'x') &&
9838 (ctxt->input->cur[3] == 'm') &&
9839 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009840 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009841 ret += 5;
9842#ifdef DEBUG_PUSH
9843 xmlGenericError(xmlGenericErrorContext,
9844 "PP: Parsing XML Decl\n");
9845#endif
9846 xmlParseXMLDecl(ctxt);
9847 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9848 /*
9849 * The XML REC instructs us to stop parsing right
9850 * here
9851 */
9852 ctxt->instate = XML_PARSER_EOF;
9853 return(0);
9854 }
9855 ctxt->standalone = ctxt->input->standalone;
9856 if ((ctxt->encoding == NULL) &&
9857 (ctxt->input->encoding != NULL))
9858 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9859 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9860 (!ctxt->disableSAX))
9861 ctxt->sax->startDocument(ctxt->userData);
9862 ctxt->instate = XML_PARSER_MISC;
9863#ifdef DEBUG_PUSH
9864 xmlGenericError(xmlGenericErrorContext,
9865 "PP: entering MISC\n");
9866#endif
9867 } else {
9868 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9869 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9870 (!ctxt->disableSAX))
9871 ctxt->sax->startDocument(ctxt->userData);
9872 ctxt->instate = XML_PARSER_MISC;
9873#ifdef DEBUG_PUSH
9874 xmlGenericError(xmlGenericErrorContext,
9875 "PP: entering MISC\n");
9876#endif
9877 }
9878 } else {
9879 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9880 ctxt->sax->setDocumentLocator(ctxt->userData,
9881 &xmlDefaultSAXLocator);
9882 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009883 if (ctxt->version == NULL) {
9884 xmlErrMemory(ctxt, NULL);
9885 break;
9886 }
Owen Taylor3473f882001-02-23 17:55:21 +00009887 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9888 (!ctxt->disableSAX))
9889 ctxt->sax->startDocument(ctxt->userData);
9890 ctxt->instate = XML_PARSER_MISC;
9891#ifdef DEBUG_PUSH
9892 xmlGenericError(xmlGenericErrorContext,
9893 "PP: entering MISC\n");
9894#endif
9895 }
9896 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009897 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009898 const xmlChar *name;
9899 const xmlChar *prefix;
9900 const xmlChar *URI;
9901 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009902
9903 if ((avail < 2) && (ctxt->inputNr == 1))
9904 goto done;
9905 cur = ctxt->input->cur[0];
9906 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009907 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009908 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009909 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9910 ctxt->sax->endDocument(ctxt->userData);
9911 goto done;
9912 }
9913 if (!terminate) {
9914 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009915 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009916 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009917 goto done;
9918 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9919 goto done;
9920 }
9921 }
9922 if (ctxt->spaceNr == 0)
9923 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009924 else if (*ctxt->space == -2)
9925 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009926 else
9927 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009928#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009929 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009930#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009931 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009932#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009933 else
9934 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009935#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009936 if (name == NULL) {
9937 spacePop(ctxt);
9938 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009939 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9940 ctxt->sax->endDocument(ctxt->userData);
9941 goto done;
9942 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009943#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009944 /*
9945 * [ VC: Root Element Type ]
9946 * The Name in the document type declaration must match
9947 * the element type of the root element.
9948 */
9949 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9950 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9951 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009952#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009953
9954 /*
9955 * Check for an Empty Element.
9956 */
9957 if ((RAW == '/') && (NXT(1) == '>')) {
9958 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009959
9960 if (ctxt->sax2) {
9961 if ((ctxt->sax != NULL) &&
9962 (ctxt->sax->endElementNs != NULL) &&
9963 (!ctxt->disableSAX))
9964 ctxt->sax->endElementNs(ctxt->userData, name,
9965 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009966 if (ctxt->nsNr - nsNr > 0)
9967 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009968#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009969 } else {
9970 if ((ctxt->sax != NULL) &&
9971 (ctxt->sax->endElement != NULL) &&
9972 (!ctxt->disableSAX))
9973 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009974#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009976 spacePop(ctxt);
9977 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009978 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009979 } else {
9980 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009981 }
9982 break;
9983 }
9984 if (RAW == '>') {
9985 NEXT;
9986 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009987 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009988 "Couldn't find end of Start Tag %s\n",
9989 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009990 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009991 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009993 if (ctxt->sax2)
9994 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009995#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009996 else
9997 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009998#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009999
Daniel Veillarda880b122003-04-21 21:36:41 +000010000 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010001 break;
10002 }
10003 case XML_PARSER_CONTENT: {
10004 const xmlChar *test;
10005 unsigned int cons;
10006 if ((avail < 2) && (ctxt->inputNr == 1))
10007 goto done;
10008 cur = ctxt->input->cur[0];
10009 next = ctxt->input->cur[1];
10010
10011 test = CUR_PTR;
10012 cons = ctxt->input->consumed;
10013 if ((cur == '<') && (next == '/')) {
10014 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010015 break;
10016 } else if ((cur == '<') && (next == '?')) {
10017 if ((!terminate) &&
10018 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10019 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010020 xmlParsePI(ctxt);
10021 } else if ((cur == '<') && (next != '!')) {
10022 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010023 break;
10024 } else if ((cur == '<') && (next == '!') &&
10025 (ctxt->input->cur[2] == '-') &&
10026 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010027 int term;
10028
10029 if (avail < 4)
10030 goto done;
10031 ctxt->input->cur += 4;
10032 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10033 ctxt->input->cur -= 4;
10034 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010035 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010036 xmlParseComment(ctxt);
10037 ctxt->instate = XML_PARSER_CONTENT;
10038 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10039 (ctxt->input->cur[2] == '[') &&
10040 (ctxt->input->cur[3] == 'C') &&
10041 (ctxt->input->cur[4] == 'D') &&
10042 (ctxt->input->cur[5] == 'A') &&
10043 (ctxt->input->cur[6] == 'T') &&
10044 (ctxt->input->cur[7] == 'A') &&
10045 (ctxt->input->cur[8] == '[')) {
10046 SKIP(9);
10047 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010048 break;
10049 } else if ((cur == '<') && (next == '!') &&
10050 (avail < 9)) {
10051 goto done;
10052 } else if (cur == '&') {
10053 if ((!terminate) &&
10054 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10055 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010056 xmlParseReference(ctxt);
10057 } else {
10058 /* TODO Avoid the extra copy, handle directly !!! */
10059 /*
10060 * Goal of the following test is:
10061 * - minimize calls to the SAX 'character' callback
10062 * when they are mergeable
10063 * - handle an problem for isBlank when we only parse
10064 * a sequence of blank chars and the next one is
10065 * not available to check against '<' presence.
10066 * - tries to homogenize the differences in SAX
10067 * callbacks between the push and pull versions
10068 * of the parser.
10069 */
10070 if ((ctxt->inputNr == 1) &&
10071 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10072 if (!terminate) {
10073 if (ctxt->progressive) {
10074 if ((lastlt == NULL) ||
10075 (ctxt->input->cur > lastlt))
10076 goto done;
10077 } else if (xmlParseLookupSequence(ctxt,
10078 '<', 0, 0) < 0) {
10079 goto done;
10080 }
10081 }
10082 }
10083 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010084 xmlParseCharData(ctxt, 0);
10085 }
10086 /*
10087 * Pop-up of finished entities.
10088 */
10089 while ((RAW == 0) && (ctxt->inputNr > 1))
10090 xmlPopInput(ctxt);
10091 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010092 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10093 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010094 ctxt->instate = XML_PARSER_EOF;
10095 break;
10096 }
10097 break;
10098 }
10099 case XML_PARSER_END_TAG:
10100 if (avail < 2)
10101 goto done;
10102 if (!terminate) {
10103 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010104 /* > can be found unescaped in attribute values */
10105 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010106 goto done;
10107 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10108 goto done;
10109 }
10110 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010111 if (ctxt->sax2) {
10112 xmlParseEndTag2(ctxt,
10113 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10114 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010115 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010116 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010117 }
10118#ifdef LIBXML_SAX1_ENABLED
10119 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010120 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010121#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010122 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010123 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010124 } else {
10125 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010126 }
10127 break;
10128 case XML_PARSER_CDATA_SECTION: {
10129 /*
10130 * The Push mode need to have the SAX callback for
10131 * cdataBlock merge back contiguous callbacks.
10132 */
10133 int base;
10134
10135 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10136 if (base < 0) {
10137 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010138 int tmp;
10139
10140 tmp = xmlCheckCdataPush(ctxt->input->cur,
10141 XML_PARSER_BIG_BUFFER_SIZE);
10142 if (tmp < 0) {
10143 tmp = -tmp;
10144 ctxt->input->cur += tmp;
10145 goto encoding_error;
10146 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010147 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10148 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010149 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010150 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010151 else if (ctxt->sax->characters != NULL)
10152 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010153 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010154 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010155 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010156 ctxt->checkIndex = 0;
10157 }
10158 goto done;
10159 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010160 int tmp;
10161
10162 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10163 if ((tmp < 0) || (tmp != base)) {
10164 tmp = -tmp;
10165 ctxt->input->cur += tmp;
10166 goto encoding_error;
10167 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010168 if ((ctxt->sax != NULL) && (base == 0) &&
10169 (ctxt->sax->cdataBlock != NULL) &&
10170 (!ctxt->disableSAX)) {
10171 /*
10172 * Special case to provide identical behaviour
10173 * between pull and push parsers on enpty CDATA
10174 * sections
10175 */
10176 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10177 (!strncmp((const char *)&ctxt->input->cur[-9],
10178 "<![CDATA[", 9)))
10179 ctxt->sax->cdataBlock(ctxt->userData,
10180 BAD_CAST "", 0);
10181 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010182 (!ctxt->disableSAX)) {
10183 if (ctxt->sax->cdataBlock != NULL)
10184 ctxt->sax->cdataBlock(ctxt->userData,
10185 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010186 else if (ctxt->sax->characters != NULL)
10187 ctxt->sax->characters(ctxt->userData,
10188 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010189 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010190 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010191 ctxt->checkIndex = 0;
10192 ctxt->instate = XML_PARSER_CONTENT;
10193#ifdef DEBUG_PUSH
10194 xmlGenericError(xmlGenericErrorContext,
10195 "PP: entering CONTENT\n");
10196#endif
10197 }
10198 break;
10199 }
Owen Taylor3473f882001-02-23 17:55:21 +000010200 case XML_PARSER_MISC:
10201 SKIP_BLANKS;
10202 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010203 avail = ctxt->input->length -
10204 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010205 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010206 avail = ctxt->input->buf->buffer->use -
10207 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010208 if (avail < 2)
10209 goto done;
10210 cur = ctxt->input->cur[0];
10211 next = ctxt->input->cur[1];
10212 if ((cur == '<') && (next == '?')) {
10213 if ((!terminate) &&
10214 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10215 goto done;
10216#ifdef DEBUG_PUSH
10217 xmlGenericError(xmlGenericErrorContext,
10218 "PP: Parsing PI\n");
10219#endif
10220 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010221 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010222 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010223 (ctxt->input->cur[2] == '-') &&
10224 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010225 if ((!terminate) &&
10226 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10227 goto done;
10228#ifdef DEBUG_PUSH
10229 xmlGenericError(xmlGenericErrorContext,
10230 "PP: Parsing Comment\n");
10231#endif
10232 xmlParseComment(ctxt);
10233 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010234 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010235 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010236 (ctxt->input->cur[2] == 'D') &&
10237 (ctxt->input->cur[3] == 'O') &&
10238 (ctxt->input->cur[4] == 'C') &&
10239 (ctxt->input->cur[5] == 'T') &&
10240 (ctxt->input->cur[6] == 'Y') &&
10241 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010242 (ctxt->input->cur[8] == 'E')) {
10243 if ((!terminate) &&
10244 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10245 goto done;
10246#ifdef DEBUG_PUSH
10247 xmlGenericError(xmlGenericErrorContext,
10248 "PP: Parsing internal subset\n");
10249#endif
10250 ctxt->inSubset = 1;
10251 xmlParseDocTypeDecl(ctxt);
10252 if (RAW == '[') {
10253 ctxt->instate = XML_PARSER_DTD;
10254#ifdef DEBUG_PUSH
10255 xmlGenericError(xmlGenericErrorContext,
10256 "PP: entering DTD\n");
10257#endif
10258 } else {
10259 /*
10260 * Create and update the external subset.
10261 */
10262 ctxt->inSubset = 2;
10263 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10264 (ctxt->sax->externalSubset != NULL))
10265 ctxt->sax->externalSubset(ctxt->userData,
10266 ctxt->intSubName, ctxt->extSubSystem,
10267 ctxt->extSubURI);
10268 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010269 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010270 ctxt->instate = XML_PARSER_PROLOG;
10271#ifdef DEBUG_PUSH
10272 xmlGenericError(xmlGenericErrorContext,
10273 "PP: entering PROLOG\n");
10274#endif
10275 }
10276 } else if ((cur == '<') && (next == '!') &&
10277 (avail < 9)) {
10278 goto done;
10279 } else {
10280 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010281 ctxt->progressive = 1;
10282 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010283#ifdef DEBUG_PUSH
10284 xmlGenericError(xmlGenericErrorContext,
10285 "PP: entering START_TAG\n");
10286#endif
10287 }
10288 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010289 case XML_PARSER_PROLOG:
10290 SKIP_BLANKS;
10291 if (ctxt->input->buf == NULL)
10292 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10293 else
10294 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10295 if (avail < 2)
10296 goto done;
10297 cur = ctxt->input->cur[0];
10298 next = ctxt->input->cur[1];
10299 if ((cur == '<') && (next == '?')) {
10300 if ((!terminate) &&
10301 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10302 goto done;
10303#ifdef DEBUG_PUSH
10304 xmlGenericError(xmlGenericErrorContext,
10305 "PP: Parsing PI\n");
10306#endif
10307 xmlParsePI(ctxt);
10308 } else if ((cur == '<') && (next == '!') &&
10309 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10310 if ((!terminate) &&
10311 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10312 goto done;
10313#ifdef DEBUG_PUSH
10314 xmlGenericError(xmlGenericErrorContext,
10315 "PP: Parsing Comment\n");
10316#endif
10317 xmlParseComment(ctxt);
10318 ctxt->instate = XML_PARSER_PROLOG;
10319 } else if ((cur == '<') && (next == '!') &&
10320 (avail < 4)) {
10321 goto done;
10322 } else {
10323 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010324 if (ctxt->progressive == 0)
10325 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010326 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010327#ifdef DEBUG_PUSH
10328 xmlGenericError(xmlGenericErrorContext,
10329 "PP: entering START_TAG\n");
10330#endif
10331 }
10332 break;
10333 case XML_PARSER_EPILOG:
10334 SKIP_BLANKS;
10335 if (ctxt->input->buf == NULL)
10336 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10337 else
10338 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10339 if (avail < 2)
10340 goto done;
10341 cur = ctxt->input->cur[0];
10342 next = ctxt->input->cur[1];
10343 if ((cur == '<') && (next == '?')) {
10344 if ((!terminate) &&
10345 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10346 goto done;
10347#ifdef DEBUG_PUSH
10348 xmlGenericError(xmlGenericErrorContext,
10349 "PP: Parsing PI\n");
10350#endif
10351 xmlParsePI(ctxt);
10352 ctxt->instate = XML_PARSER_EPILOG;
10353 } else if ((cur == '<') && (next == '!') &&
10354 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10355 if ((!terminate) &&
10356 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10357 goto done;
10358#ifdef DEBUG_PUSH
10359 xmlGenericError(xmlGenericErrorContext,
10360 "PP: Parsing Comment\n");
10361#endif
10362 xmlParseComment(ctxt);
10363 ctxt->instate = XML_PARSER_EPILOG;
10364 } else if ((cur == '<') && (next == '!') &&
10365 (avail < 4)) {
10366 goto done;
10367 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010368 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010369 ctxt->instate = XML_PARSER_EOF;
10370#ifdef DEBUG_PUSH
10371 xmlGenericError(xmlGenericErrorContext,
10372 "PP: entering EOF\n");
10373#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010374 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010375 ctxt->sax->endDocument(ctxt->userData);
10376 goto done;
10377 }
10378 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010379 case XML_PARSER_DTD: {
10380 /*
10381 * Sorry but progressive parsing of the internal subset
10382 * is not expected to be supported. We first check that
10383 * the full content of the internal subset is available and
10384 * the parsing is launched only at that point.
10385 * Internal subset ends up with "']' S? '>'" in an unescaped
10386 * section and not in a ']]>' sequence which are conditional
10387 * sections (whoever argued to keep that crap in XML deserve
10388 * a place in hell !).
10389 */
10390 int base, i;
10391 xmlChar *buf;
10392 xmlChar quote = 0;
10393
10394 base = ctxt->input->cur - ctxt->input->base;
10395 if (base < 0) return(0);
10396 if (ctxt->checkIndex > base)
10397 base = ctxt->checkIndex;
10398 buf = ctxt->input->buf->buffer->content;
10399 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10400 base++) {
10401 if (quote != 0) {
10402 if (buf[base] == quote)
10403 quote = 0;
10404 continue;
10405 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010406 if ((quote == 0) && (buf[base] == '<')) {
10407 int found = 0;
10408 /* special handling of comments */
10409 if (((unsigned int) base + 4 <
10410 ctxt->input->buf->buffer->use) &&
10411 (buf[base + 1] == '!') &&
10412 (buf[base + 2] == '-') &&
10413 (buf[base + 3] == '-')) {
10414 for (;(unsigned int) base + 3 <
10415 ctxt->input->buf->buffer->use; base++) {
10416 if ((buf[base] == '-') &&
10417 (buf[base + 1] == '-') &&
10418 (buf[base + 2] == '>')) {
10419 found = 1;
10420 base += 2;
10421 break;
10422 }
10423 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010424 if (!found) {
10425#if 0
10426 fprintf(stderr, "unfinished comment\n");
10427#endif
10428 break; /* for */
10429 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010430 continue;
10431 }
10432 }
Owen Taylor3473f882001-02-23 17:55:21 +000010433 if (buf[base] == '"') {
10434 quote = '"';
10435 continue;
10436 }
10437 if (buf[base] == '\'') {
10438 quote = '\'';
10439 continue;
10440 }
10441 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010442#if 0
10443 fprintf(stderr, "%c%c%c%c: ", buf[base],
10444 buf[base + 1], buf[base + 2], buf[base + 3]);
10445#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010446 if ((unsigned int) base +1 >=
10447 ctxt->input->buf->buffer->use)
10448 break;
10449 if (buf[base + 1] == ']') {
10450 /* conditional crap, skip both ']' ! */
10451 base++;
10452 continue;
10453 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010454 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010455 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10456 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010457 if (buf[base + i] == '>') {
10458#if 0
10459 fprintf(stderr, "found\n");
10460#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010461 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010462 }
10463 if (!IS_BLANK_CH(buf[base + i])) {
10464#if 0
10465 fprintf(stderr, "not found\n");
10466#endif
10467 goto not_end_of_int_subset;
10468 }
Owen Taylor3473f882001-02-23 17:55:21 +000010469 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010470#if 0
10471 fprintf(stderr, "end of stream\n");
10472#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010473 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010474
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010476not_end_of_int_subset:
10477 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010478 }
10479 /*
10480 * We didn't found the end of the Internal subset
10481 */
Owen Taylor3473f882001-02-23 17:55:21 +000010482#ifdef DEBUG_PUSH
10483 if (next == 0)
10484 xmlGenericError(xmlGenericErrorContext,
10485 "PP: lookup of int subset end filed\n");
10486#endif
10487 goto done;
10488
10489found_end_int_subset:
10490 xmlParseInternalSubset(ctxt);
10491 ctxt->inSubset = 2;
10492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10493 (ctxt->sax->externalSubset != NULL))
10494 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10495 ctxt->extSubSystem, ctxt->extSubURI);
10496 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010497 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010498 ctxt->instate = XML_PARSER_PROLOG;
10499 ctxt->checkIndex = 0;
10500#ifdef DEBUG_PUSH
10501 xmlGenericError(xmlGenericErrorContext,
10502 "PP: entering PROLOG\n");
10503#endif
10504 break;
10505 }
10506 case XML_PARSER_COMMENT:
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: internal error, state == COMMENT\n");
10509 ctxt->instate = XML_PARSER_CONTENT;
10510#ifdef DEBUG_PUSH
10511 xmlGenericError(xmlGenericErrorContext,
10512 "PP: entering CONTENT\n");
10513#endif
10514 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010515 case XML_PARSER_IGNORE:
10516 xmlGenericError(xmlGenericErrorContext,
10517 "PP: internal error, state == IGNORE");
10518 ctxt->instate = XML_PARSER_DTD;
10519#ifdef DEBUG_PUSH
10520 xmlGenericError(xmlGenericErrorContext,
10521 "PP: entering DTD\n");
10522#endif
10523 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010524 case XML_PARSER_PI:
10525 xmlGenericError(xmlGenericErrorContext,
10526 "PP: internal error, state == PI\n");
10527 ctxt->instate = XML_PARSER_CONTENT;
10528#ifdef DEBUG_PUSH
10529 xmlGenericError(xmlGenericErrorContext,
10530 "PP: entering CONTENT\n");
10531#endif
10532 break;
10533 case XML_PARSER_ENTITY_DECL:
10534 xmlGenericError(xmlGenericErrorContext,
10535 "PP: internal error, state == ENTITY_DECL\n");
10536 ctxt->instate = XML_PARSER_DTD;
10537#ifdef DEBUG_PUSH
10538 xmlGenericError(xmlGenericErrorContext,
10539 "PP: entering DTD\n");
10540#endif
10541 break;
10542 case XML_PARSER_ENTITY_VALUE:
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: internal error, state == ENTITY_VALUE\n");
10545 ctxt->instate = XML_PARSER_CONTENT;
10546#ifdef DEBUG_PUSH
10547 xmlGenericError(xmlGenericErrorContext,
10548 "PP: entering DTD\n");
10549#endif
10550 break;
10551 case XML_PARSER_ATTRIBUTE_VALUE:
10552 xmlGenericError(xmlGenericErrorContext,
10553 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10554 ctxt->instate = XML_PARSER_START_TAG;
10555#ifdef DEBUG_PUSH
10556 xmlGenericError(xmlGenericErrorContext,
10557 "PP: entering START_TAG\n");
10558#endif
10559 break;
10560 case XML_PARSER_SYSTEM_LITERAL:
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: internal error, state == SYSTEM_LITERAL\n");
10563 ctxt->instate = XML_PARSER_START_TAG;
10564#ifdef DEBUG_PUSH
10565 xmlGenericError(xmlGenericErrorContext,
10566 "PP: entering START_TAG\n");
10567#endif
10568 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010569 case XML_PARSER_PUBLIC_LITERAL:
10570 xmlGenericError(xmlGenericErrorContext,
10571 "PP: internal error, state == PUBLIC_LITERAL\n");
10572 ctxt->instate = XML_PARSER_START_TAG;
10573#ifdef DEBUG_PUSH
10574 xmlGenericError(xmlGenericErrorContext,
10575 "PP: entering START_TAG\n");
10576#endif
10577 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010578 }
10579 }
10580done:
10581#ifdef DEBUG_PUSH
10582 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10583#endif
10584 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010585encoding_error:
10586 {
10587 char buffer[150];
10588
10589 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10590 ctxt->input->cur[0], ctxt->input->cur[1],
10591 ctxt->input->cur[2], ctxt->input->cur[3]);
10592 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10593 "Input is not proper UTF-8, indicate encoding !\n%s",
10594 BAD_CAST buffer, NULL);
10595 }
10596 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010597}
10598
10599/**
Owen Taylor3473f882001-02-23 17:55:21 +000010600 * xmlParseChunk:
10601 * @ctxt: an XML parser context
10602 * @chunk: an char array
10603 * @size: the size in byte of the chunk
10604 * @terminate: last chunk indicator
10605 *
10606 * Parse a Chunk of memory
10607 *
10608 * Returns zero if no error, the xmlParserErrors otherwise.
10609 */
10610int
10611xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10612 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010613 int end_in_lf = 0;
10614
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010615 if (ctxt == NULL)
10616 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010617 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010618 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010619 if (ctxt->instate == XML_PARSER_START)
10620 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010621 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10622 (chunk[size - 1] == '\r')) {
10623 end_in_lf = 1;
10624 size--;
10625 }
Owen Taylor3473f882001-02-23 17:55:21 +000010626 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10627 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10628 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10629 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010630 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010631
William M. Bracka3215c72004-07-31 16:24:01 +000010632 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10633 if (res < 0) {
10634 ctxt->errNo = XML_PARSER_EOF;
10635 ctxt->disableSAX = 1;
10636 return (XML_PARSER_EOF);
10637 }
Owen Taylor3473f882001-02-23 17:55:21 +000010638 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10639 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010640 ctxt->input->end =
10641 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010642#ifdef DEBUG_PUSH
10643 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10644#endif
10645
Owen Taylor3473f882001-02-23 17:55:21 +000010646 } else if (ctxt->instate != XML_PARSER_EOF) {
10647 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10648 xmlParserInputBufferPtr in = ctxt->input->buf;
10649 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10650 (in->raw != NULL)) {
10651 int nbchars;
10652
10653 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10654 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010655 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010656 xmlGenericError(xmlGenericErrorContext,
10657 "xmlParseChunk: encoder error\n");
10658 return(XML_ERR_INVALID_ENCODING);
10659 }
10660 }
10661 }
10662 }
10663 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010664 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10665 (ctxt->input->buf != NULL)) {
10666 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10667 }
Daniel Veillard14412512005-01-21 23:53:26 +000010668 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010669 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010670 if (terminate) {
10671 /*
10672 * Check for termination
10673 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010674 int avail = 0;
10675
10676 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010677 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010678 avail = ctxt->input->length -
10679 (ctxt->input->cur - ctxt->input->base);
10680 else
10681 avail = ctxt->input->buf->buffer->use -
10682 (ctxt->input->cur - ctxt->input->base);
10683 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010684
Owen Taylor3473f882001-02-23 17:55:21 +000010685 if ((ctxt->instate != XML_PARSER_EOF) &&
10686 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010687 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010688 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010689 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010690 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010691 }
Owen Taylor3473f882001-02-23 17:55:21 +000010692 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010693 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010694 ctxt->sax->endDocument(ctxt->userData);
10695 }
10696 ctxt->instate = XML_PARSER_EOF;
10697 }
10698 return((xmlParserErrors) ctxt->errNo);
10699}
10700
10701/************************************************************************
10702 * *
10703 * I/O front end functions to the parser *
10704 * *
10705 ************************************************************************/
10706
10707/**
Owen Taylor3473f882001-02-23 17:55:21 +000010708 * xmlCreatePushParserCtxt:
10709 * @sax: a SAX handler
10710 * @user_data: The user data returned on SAX callbacks
10711 * @chunk: a pointer to an array of chars
10712 * @size: number of chars in the array
10713 * @filename: an optional file name or URI
10714 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010715 * Create a parser context for using the XML parser in push mode.
10716 * If @buffer and @size are non-NULL, the data is used to detect
10717 * the encoding. The remaining characters will be parsed so they
10718 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010719 * To allow content encoding detection, @size should be >= 4
10720 * The value of @filename is used for fetching external entities
10721 * and error/warning reports.
10722 *
10723 * Returns the new parser context or NULL
10724 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010725
Owen Taylor3473f882001-02-23 17:55:21 +000010726xmlParserCtxtPtr
10727xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10728 const char *chunk, int size, const char *filename) {
10729 xmlParserCtxtPtr ctxt;
10730 xmlParserInputPtr inputStream;
10731 xmlParserInputBufferPtr buf;
10732 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10733
10734 /*
10735 * plug some encoding conversion routines
10736 */
10737 if ((chunk != NULL) && (size >= 4))
10738 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10739
10740 buf = xmlAllocParserInputBuffer(enc);
10741 if (buf == NULL) return(NULL);
10742
10743 ctxt = xmlNewParserCtxt();
10744 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010745 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010746 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010747 return(NULL);
10748 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010749 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010750 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10751 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010752 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010753 xmlFreeParserInputBuffer(buf);
10754 xmlFreeParserCtxt(ctxt);
10755 return(NULL);
10756 }
Owen Taylor3473f882001-02-23 17:55:21 +000010757 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010758#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010759 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010760#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010761 xmlFree(ctxt->sax);
10762 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10763 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010764 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010765 xmlFreeParserInputBuffer(buf);
10766 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010767 return(NULL);
10768 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010769 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10770 if (sax->initialized == XML_SAX2_MAGIC)
10771 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10772 else
10773 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010774 if (user_data != NULL)
10775 ctxt->userData = user_data;
10776 }
10777 if (filename == NULL) {
10778 ctxt->directory = NULL;
10779 } else {
10780 ctxt->directory = xmlParserGetDirectory(filename);
10781 }
10782
10783 inputStream = xmlNewInputStream(ctxt);
10784 if (inputStream == NULL) {
10785 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010786 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010787 return(NULL);
10788 }
10789
10790 if (filename == NULL)
10791 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010792 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010793 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010794 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010795 if (inputStream->filename == NULL) {
10796 xmlFreeParserCtxt(ctxt);
10797 xmlFreeParserInputBuffer(buf);
10798 return(NULL);
10799 }
10800 }
Owen Taylor3473f882001-02-23 17:55:21 +000010801 inputStream->buf = buf;
10802 inputStream->base = inputStream->buf->buffer->content;
10803 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010804 inputStream->end =
10805 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010806
10807 inputPush(ctxt, inputStream);
10808
William M. Brack3a1cd212005-02-11 14:35:54 +000010809 /*
10810 * If the caller didn't provide an initial 'chunk' for determining
10811 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10812 * that it can be automatically determined later
10813 */
10814 if ((size == 0) || (chunk == NULL)) {
10815 ctxt->charset = XML_CHAR_ENCODING_NONE;
10816 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010817 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10818 int cur = ctxt->input->cur - ctxt->input->base;
10819
Owen Taylor3473f882001-02-23 17:55:21 +000010820 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010821
10822 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10823 ctxt->input->cur = ctxt->input->base + cur;
10824 ctxt->input->end =
10825 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010826#ifdef DEBUG_PUSH
10827 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10828#endif
10829 }
10830
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010831 if (enc != XML_CHAR_ENCODING_NONE) {
10832 xmlSwitchEncoding(ctxt, enc);
10833 }
10834
Owen Taylor3473f882001-02-23 17:55:21 +000010835 return(ctxt);
10836}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010837#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010838
10839/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010840 * xmlStopParser:
10841 * @ctxt: an XML parser context
10842 *
10843 * Blocks further parser processing
10844 */
10845void
10846xmlStopParser(xmlParserCtxtPtr ctxt) {
10847 if (ctxt == NULL)
10848 return;
10849 ctxt->instate = XML_PARSER_EOF;
10850 ctxt->disableSAX = 1;
10851 if (ctxt->input != NULL) {
10852 ctxt->input->cur = BAD_CAST"";
10853 ctxt->input->base = ctxt->input->cur;
10854 }
10855}
10856
10857/**
Owen Taylor3473f882001-02-23 17:55:21 +000010858 * xmlCreateIOParserCtxt:
10859 * @sax: a SAX handler
10860 * @user_data: The user data returned on SAX callbacks
10861 * @ioread: an I/O read function
10862 * @ioclose: an I/O close function
10863 * @ioctx: an I/O handler
10864 * @enc: the charset encoding if known
10865 *
10866 * Create a parser context for using the XML parser with an existing
10867 * I/O stream
10868 *
10869 * Returns the new parser context or NULL
10870 */
10871xmlParserCtxtPtr
10872xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10873 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10874 void *ioctx, xmlCharEncoding enc) {
10875 xmlParserCtxtPtr ctxt;
10876 xmlParserInputPtr inputStream;
10877 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010878
10879 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010880
10881 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10882 if (buf == NULL) return(NULL);
10883
10884 ctxt = xmlNewParserCtxt();
10885 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010886 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010887 return(NULL);
10888 }
10889 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010890#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010891 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010892#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010893 xmlFree(ctxt->sax);
10894 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10895 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010896 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010897 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010898 return(NULL);
10899 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010900 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10901 if (sax->initialized == XML_SAX2_MAGIC)
10902 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10903 else
10904 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010905 if (user_data != NULL)
10906 ctxt->userData = user_data;
10907 }
10908
10909 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10910 if (inputStream == NULL) {
10911 xmlFreeParserCtxt(ctxt);
10912 return(NULL);
10913 }
10914 inputPush(ctxt, inputStream);
10915
10916 return(ctxt);
10917}
10918
Daniel Veillard4432df22003-09-28 18:58:27 +000010919#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010920/************************************************************************
10921 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010922 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010923 * *
10924 ************************************************************************/
10925
10926/**
10927 * xmlIOParseDTD:
10928 * @sax: the SAX handler block or NULL
10929 * @input: an Input Buffer
10930 * @enc: the charset encoding if known
10931 *
10932 * Load and parse a DTD
10933 *
10934 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010935 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010936 */
10937
10938xmlDtdPtr
10939xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10940 xmlCharEncoding enc) {
10941 xmlDtdPtr ret = NULL;
10942 xmlParserCtxtPtr ctxt;
10943 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010944 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010945
10946 if (input == NULL)
10947 return(NULL);
10948
10949 ctxt = xmlNewParserCtxt();
10950 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010951 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010952 return(NULL);
10953 }
10954
10955 /*
10956 * Set-up the SAX context
10957 */
10958 if (sax != NULL) {
10959 if (ctxt->sax != NULL)
10960 xmlFree(ctxt->sax);
10961 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010962 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010963 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010964 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010965
10966 /*
10967 * generate a parser input from the I/O handler
10968 */
10969
Daniel Veillard43caefb2003-12-07 19:32:22 +000010970 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010971 if (pinput == NULL) {
10972 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010973 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010974 xmlFreeParserCtxt(ctxt);
10975 return(NULL);
10976 }
10977
10978 /*
10979 * plug some encoding conversion routines here.
10980 */
10981 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010982 if (enc != XML_CHAR_ENCODING_NONE) {
10983 xmlSwitchEncoding(ctxt, enc);
10984 }
Owen Taylor3473f882001-02-23 17:55:21 +000010985
10986 pinput->filename = NULL;
10987 pinput->line = 1;
10988 pinput->col = 1;
10989 pinput->base = ctxt->input->cur;
10990 pinput->cur = ctxt->input->cur;
10991 pinput->free = NULL;
10992
10993 /*
10994 * let's parse that entity knowing it's an external subset.
10995 */
10996 ctxt->inSubset = 2;
10997 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10998 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10999 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011000
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011001 if ((enc == XML_CHAR_ENCODING_NONE) &&
11002 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011003 /*
11004 * Get the 4 first bytes and decode the charset
11005 * if enc != XML_CHAR_ENCODING_NONE
11006 * plug some encoding conversion routines.
11007 */
11008 start[0] = RAW;
11009 start[1] = NXT(1);
11010 start[2] = NXT(2);
11011 start[3] = NXT(3);
11012 enc = xmlDetectCharEncoding(start, 4);
11013 if (enc != XML_CHAR_ENCODING_NONE) {
11014 xmlSwitchEncoding(ctxt, enc);
11015 }
11016 }
11017
Owen Taylor3473f882001-02-23 17:55:21 +000011018 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11019
11020 if (ctxt->myDoc != NULL) {
11021 if (ctxt->wellFormed) {
11022 ret = ctxt->myDoc->extSubset;
11023 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011024 if (ret != NULL) {
11025 xmlNodePtr tmp;
11026
11027 ret->doc = NULL;
11028 tmp = ret->children;
11029 while (tmp != NULL) {
11030 tmp->doc = NULL;
11031 tmp = tmp->next;
11032 }
11033 }
Owen Taylor3473f882001-02-23 17:55:21 +000011034 } else {
11035 ret = NULL;
11036 }
11037 xmlFreeDoc(ctxt->myDoc);
11038 ctxt->myDoc = NULL;
11039 }
11040 if (sax != NULL) ctxt->sax = NULL;
11041 xmlFreeParserCtxt(ctxt);
11042
11043 return(ret);
11044}
11045
11046/**
11047 * xmlSAXParseDTD:
11048 * @sax: the SAX handler block
11049 * @ExternalID: a NAME* containing the External ID of the DTD
11050 * @SystemID: a NAME* containing the URL to the DTD
11051 *
11052 * Load and parse an external subset.
11053 *
11054 * Returns the resulting xmlDtdPtr or NULL in case of error.
11055 */
11056
11057xmlDtdPtr
11058xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11059 const xmlChar *SystemID) {
11060 xmlDtdPtr ret = NULL;
11061 xmlParserCtxtPtr ctxt;
11062 xmlParserInputPtr input = NULL;
11063 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011064 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011065
11066 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11067
11068 ctxt = xmlNewParserCtxt();
11069 if (ctxt == NULL) {
11070 return(NULL);
11071 }
11072
11073 /*
11074 * Set-up the SAX context
11075 */
11076 if (sax != NULL) {
11077 if (ctxt->sax != NULL)
11078 xmlFree(ctxt->sax);
11079 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011080 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011081 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011082
11083 /*
11084 * Canonicalise the system ID
11085 */
11086 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011087 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011088 xmlFreeParserCtxt(ctxt);
11089 return(NULL);
11090 }
Owen Taylor3473f882001-02-23 17:55:21 +000011091
11092 /*
11093 * Ask the Entity resolver to load the damn thing
11094 */
11095
11096 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011097 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11098 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011099 if (input == NULL) {
11100 if (sax != NULL) ctxt->sax = NULL;
11101 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011102 if (systemIdCanonic != NULL)
11103 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011104 return(NULL);
11105 }
11106
11107 /*
11108 * plug some encoding conversion routines here.
11109 */
11110 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011111 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11112 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11113 xmlSwitchEncoding(ctxt, enc);
11114 }
Owen Taylor3473f882001-02-23 17:55:21 +000011115
11116 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011117 input->filename = (char *) systemIdCanonic;
11118 else
11119 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011120 input->line = 1;
11121 input->col = 1;
11122 input->base = ctxt->input->cur;
11123 input->cur = ctxt->input->cur;
11124 input->free = NULL;
11125
11126 /*
11127 * let's parse that entity knowing it's an external subset.
11128 */
11129 ctxt->inSubset = 2;
11130 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11131 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11132 ExternalID, SystemID);
11133 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11134
11135 if (ctxt->myDoc != NULL) {
11136 if (ctxt->wellFormed) {
11137 ret = ctxt->myDoc->extSubset;
11138 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011139 if (ret != NULL) {
11140 xmlNodePtr tmp;
11141
11142 ret->doc = NULL;
11143 tmp = ret->children;
11144 while (tmp != NULL) {
11145 tmp->doc = NULL;
11146 tmp = tmp->next;
11147 }
11148 }
Owen Taylor3473f882001-02-23 17:55:21 +000011149 } else {
11150 ret = NULL;
11151 }
11152 xmlFreeDoc(ctxt->myDoc);
11153 ctxt->myDoc = NULL;
11154 }
11155 if (sax != NULL) ctxt->sax = NULL;
11156 xmlFreeParserCtxt(ctxt);
11157
11158 return(ret);
11159}
11160
Daniel Veillard4432df22003-09-28 18:58:27 +000011161
Owen Taylor3473f882001-02-23 17:55:21 +000011162/**
11163 * xmlParseDTD:
11164 * @ExternalID: a NAME* containing the External ID of the DTD
11165 * @SystemID: a NAME* containing the URL to the DTD
11166 *
11167 * Load and parse an external subset.
11168 *
11169 * Returns the resulting xmlDtdPtr or NULL in case of error.
11170 */
11171
11172xmlDtdPtr
11173xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11174 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11175}
Daniel Veillard4432df22003-09-28 18:58:27 +000011176#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011177
11178/************************************************************************
11179 * *
11180 * Front ends when parsing an Entity *
11181 * *
11182 ************************************************************************/
11183
11184/**
Owen Taylor3473f882001-02-23 17:55:21 +000011185 * xmlParseCtxtExternalEntity:
11186 * @ctx: the existing parsing context
11187 * @URL: the URL for the entity to load
11188 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011189 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011190 *
11191 * Parse an external general entity within an existing parsing context
11192 * An external general parsed entity is well-formed if it matches the
11193 * production labeled extParsedEnt.
11194 *
11195 * [78] extParsedEnt ::= TextDecl? content
11196 *
11197 * Returns 0 if the entity is well formed, -1 in case of args problem and
11198 * the parser error code otherwise
11199 */
11200
11201int
11202xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011203 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011204 xmlParserCtxtPtr ctxt;
11205 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011206 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011207 xmlSAXHandlerPtr oldsax = NULL;
11208 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011209 xmlChar start[4];
11210 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011211 xmlParserInputPtr inputStream;
11212 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011213
Daniel Veillardce682bc2004-11-05 17:22:25 +000011214 if (ctx == NULL) return(-1);
11215
Owen Taylor3473f882001-02-23 17:55:21 +000011216 if (ctx->depth > 40) {
11217 return(XML_ERR_ENTITY_LOOP);
11218 }
11219
Daniel Veillardcda96922001-08-21 10:56:31 +000011220 if (lst != NULL)
11221 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011222 if ((URL == NULL) && (ID == NULL))
11223 return(-1);
11224 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11225 return(-1);
11226
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011227 ctxt = xmlNewParserCtxt();
11228 if (ctxt == NULL) {
11229 return(-1);
11230 }
11231
Owen Taylor3473f882001-02-23 17:55:21 +000011232 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011233 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011234
11235 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11236 if (inputStream == NULL) {
11237 xmlFreeParserCtxt(ctxt);
11238 return(-1);
11239 }
11240
11241 inputPush(ctxt, inputStream);
11242
11243 if ((ctxt->directory == NULL) && (directory == NULL))
11244 directory = xmlParserGetDirectory((char *)URL);
11245 if ((ctxt->directory == NULL) && (directory != NULL))
11246 ctxt->directory = directory;
11247
Owen Taylor3473f882001-02-23 17:55:21 +000011248 oldsax = ctxt->sax;
11249 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011250 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011251 newDoc = xmlNewDoc(BAD_CAST "1.0");
11252 if (newDoc == NULL) {
11253 xmlFreeParserCtxt(ctxt);
11254 return(-1);
11255 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011256 if (ctx->myDoc->dict) {
11257 newDoc->dict = ctx->myDoc->dict;
11258 xmlDictReference(newDoc->dict);
11259 }
Owen Taylor3473f882001-02-23 17:55:21 +000011260 if (ctx->myDoc != NULL) {
11261 newDoc->intSubset = ctx->myDoc->intSubset;
11262 newDoc->extSubset = ctx->myDoc->extSubset;
11263 }
11264 if (ctx->myDoc->URL != NULL) {
11265 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11266 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011267 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11268 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011269 ctxt->sax = oldsax;
11270 xmlFreeParserCtxt(ctxt);
11271 newDoc->intSubset = NULL;
11272 newDoc->extSubset = NULL;
11273 xmlFreeDoc(newDoc);
11274 return(-1);
11275 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011276 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011277 nodePush(ctxt, newDoc->children);
11278 if (ctx->myDoc == NULL) {
11279 ctxt->myDoc = newDoc;
11280 } else {
11281 ctxt->myDoc = ctx->myDoc;
11282 newDoc->children->doc = ctx->myDoc;
11283 }
11284
Daniel Veillard87a764e2001-06-20 17:41:10 +000011285 /*
11286 * Get the 4 first bytes and decode the charset
11287 * if enc != XML_CHAR_ENCODING_NONE
11288 * plug some encoding conversion routines.
11289 */
11290 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011291 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11292 start[0] = RAW;
11293 start[1] = NXT(1);
11294 start[2] = NXT(2);
11295 start[3] = NXT(3);
11296 enc = xmlDetectCharEncoding(start, 4);
11297 if (enc != XML_CHAR_ENCODING_NONE) {
11298 xmlSwitchEncoding(ctxt, enc);
11299 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011300 }
11301
Owen Taylor3473f882001-02-23 17:55:21 +000011302 /*
11303 * Parse a possible text declaration first
11304 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011305 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011306 xmlParseTextDecl(ctxt);
11307 }
11308
11309 /*
11310 * Doing validity checking on chunk doesn't make sense
11311 */
11312 ctxt->instate = XML_PARSER_CONTENT;
11313 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011314 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011315 ctxt->loadsubset = ctx->loadsubset;
11316 ctxt->depth = ctx->depth + 1;
11317 ctxt->replaceEntities = ctx->replaceEntities;
11318 if (ctxt->validate) {
11319 ctxt->vctxt.error = ctx->vctxt.error;
11320 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011321 } else {
11322 ctxt->vctxt.error = NULL;
11323 ctxt->vctxt.warning = NULL;
11324 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011325 ctxt->vctxt.nodeTab = NULL;
11326 ctxt->vctxt.nodeNr = 0;
11327 ctxt->vctxt.nodeMax = 0;
11328 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011329 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11330 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011331 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11332 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11333 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011334 ctxt->dictNames = ctx->dictNames;
11335 ctxt->attsDefault = ctx->attsDefault;
11336 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011337 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011338
11339 xmlParseContent(ctxt);
11340
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011341 ctx->validate = ctxt->validate;
11342 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011343 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011344 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011345 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011346 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011347 }
11348 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011349 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011350 }
11351
11352 if (!ctxt->wellFormed) {
11353 if (ctxt->errNo == 0)
11354 ret = 1;
11355 else
11356 ret = ctxt->errNo;
11357 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011358 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011359 xmlNodePtr cur;
11360
11361 /*
11362 * Return the newly created nodeset after unlinking it from
11363 * they pseudo parent.
11364 */
11365 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011366 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011367 while (cur != NULL) {
11368 cur->parent = NULL;
11369 cur = cur->next;
11370 }
11371 newDoc->children->children = NULL;
11372 }
11373 ret = 0;
11374 }
11375 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011376 ctxt->dict = NULL;
11377 ctxt->attsDefault = NULL;
11378 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011379 xmlFreeParserCtxt(ctxt);
11380 newDoc->intSubset = NULL;
11381 newDoc->extSubset = NULL;
11382 xmlFreeDoc(newDoc);
11383
11384 return(ret);
11385}
11386
11387/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011388 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011389 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011390 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011391 * @sax: the SAX handler bloc (possibly NULL)
11392 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11393 * @depth: Used for loop detection, use 0
11394 * @URL: the URL for the entity to load
11395 * @ID: the System ID for the entity to load
11396 * @list: the return value for the set of parsed nodes
11397 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011398 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011399 *
11400 * Returns 0 if the entity is well formed, -1 in case of args problem and
11401 * the parser error code otherwise
11402 */
11403
Daniel Veillard7d515752003-09-26 19:12:37 +000011404static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011405xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11406 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011407 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011408 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011409 xmlParserCtxtPtr ctxt;
11410 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011411 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011412 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011413 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011414 xmlChar start[4];
11415 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011416
11417 if (depth > 40) {
11418 return(XML_ERR_ENTITY_LOOP);
11419 }
11420
11421
11422
11423 if (list != NULL)
11424 *list = NULL;
11425 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011426 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011427 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011428 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011429
11430
11431 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011432 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011433 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011434 if (oldctxt != NULL) {
11435 ctxt->_private = oldctxt->_private;
11436 ctxt->loadsubset = oldctxt->loadsubset;
11437 ctxt->validate = oldctxt->validate;
11438 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011439 ctxt->record_info = oldctxt->record_info;
11440 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11441 ctxt->node_seq.length = oldctxt->node_seq.length;
11442 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011443 } else {
11444 /*
11445 * Doing validity checking on chunk without context
11446 * doesn't make sense
11447 */
11448 ctxt->_private = NULL;
11449 ctxt->validate = 0;
11450 ctxt->external = 2;
11451 ctxt->loadsubset = 0;
11452 }
Owen Taylor3473f882001-02-23 17:55:21 +000011453 if (sax != NULL) {
11454 oldsax = ctxt->sax;
11455 ctxt->sax = sax;
11456 if (user_data != NULL)
11457 ctxt->userData = user_data;
11458 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011459 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011460 newDoc = xmlNewDoc(BAD_CAST "1.0");
11461 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011462 ctxt->node_seq.maximum = 0;
11463 ctxt->node_seq.length = 0;
11464 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011465 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011466 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011467 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011468 newDoc->intSubset = doc->intSubset;
11469 newDoc->extSubset = doc->extSubset;
11470 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011471 xmlDictReference(newDoc->dict);
11472
Owen Taylor3473f882001-02-23 17:55:21 +000011473 if (doc->URL != NULL) {
11474 newDoc->URL = xmlStrdup(doc->URL);
11475 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011476 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11477 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011478 if (sax != NULL)
11479 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011480 ctxt->node_seq.maximum = 0;
11481 ctxt->node_seq.length = 0;
11482 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011483 xmlFreeParserCtxt(ctxt);
11484 newDoc->intSubset = NULL;
11485 newDoc->extSubset = NULL;
11486 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011487 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011488 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011489 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011490 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011491 ctxt->myDoc = doc;
11492 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011493
Daniel Veillard87a764e2001-06-20 17:41:10 +000011494 /*
11495 * Get the 4 first bytes and decode the charset
11496 * if enc != XML_CHAR_ENCODING_NONE
11497 * plug some encoding conversion routines.
11498 */
11499 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011500 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11501 start[0] = RAW;
11502 start[1] = NXT(1);
11503 start[2] = NXT(2);
11504 start[3] = NXT(3);
11505 enc = xmlDetectCharEncoding(start, 4);
11506 if (enc != XML_CHAR_ENCODING_NONE) {
11507 xmlSwitchEncoding(ctxt, enc);
11508 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011509 }
11510
Owen Taylor3473f882001-02-23 17:55:21 +000011511 /*
11512 * Parse a possible text declaration first
11513 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011514 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011515 xmlParseTextDecl(ctxt);
11516 }
11517
Owen Taylor3473f882001-02-23 17:55:21 +000011518 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011519 ctxt->depth = depth;
11520
11521 xmlParseContent(ctxt);
11522
Daniel Veillard561b7f82002-03-20 21:55:57 +000011523 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011524 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011525 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011526 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011527 }
11528 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011529 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011530 }
11531
11532 if (!ctxt->wellFormed) {
11533 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011534 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011535 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011536 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011537 } else {
11538 if (list != NULL) {
11539 xmlNodePtr cur;
11540
11541 /*
11542 * Return the newly created nodeset after unlinking it from
11543 * they pseudo parent.
11544 */
11545 cur = newDoc->children->children;
11546 *list = cur;
11547 while (cur != NULL) {
11548 cur->parent = NULL;
11549 cur = cur->next;
11550 }
11551 newDoc->children->children = NULL;
11552 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011553 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011554 }
11555 if (sax != NULL)
11556 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011557 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11558 oldctxt->node_seq.length = ctxt->node_seq.length;
11559 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011560 ctxt->node_seq.maximum = 0;
11561 ctxt->node_seq.length = 0;
11562 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011563 xmlFreeParserCtxt(ctxt);
11564 newDoc->intSubset = NULL;
11565 newDoc->extSubset = NULL;
11566 xmlFreeDoc(newDoc);
11567
11568 return(ret);
11569}
11570
Daniel Veillard81273902003-09-30 00:43:48 +000011571#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011572/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011573 * xmlParseExternalEntity:
11574 * @doc: the document the chunk pertains to
11575 * @sax: the SAX handler bloc (possibly NULL)
11576 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11577 * @depth: Used for loop detection, use 0
11578 * @URL: the URL for the entity to load
11579 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011580 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011581 *
11582 * Parse an external general entity
11583 * An external general parsed entity is well-formed if it matches the
11584 * production labeled extParsedEnt.
11585 *
11586 * [78] extParsedEnt ::= TextDecl? content
11587 *
11588 * Returns 0 if the entity is well formed, -1 in case of args problem and
11589 * the parser error code otherwise
11590 */
11591
11592int
11593xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011594 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011595 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011596 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011597}
11598
11599/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011600 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011601 * @doc: the document the chunk pertains to
11602 * @sax: the SAX handler bloc (possibly NULL)
11603 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11604 * @depth: Used for loop detection, use 0
11605 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011606 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011607 *
11608 * Parse a well-balanced chunk of an XML document
11609 * called by the parser
11610 * The allowed sequence for the Well Balanced Chunk is the one defined by
11611 * the content production in the XML grammar:
11612 *
11613 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11614 *
11615 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11616 * the parser error code otherwise
11617 */
11618
11619int
11620xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011621 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011622 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11623 depth, string, lst, 0 );
11624}
Daniel Veillard81273902003-09-30 00:43:48 +000011625#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011626
11627/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011628 * xmlParseBalancedChunkMemoryInternal:
11629 * @oldctxt: the existing parsing context
11630 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11631 * @user_data: the user data field for the parser context
11632 * @lst: the return value for the set of parsed nodes
11633 *
11634 *
11635 * Parse a well-balanced chunk of an XML document
11636 * called by the parser
11637 * The allowed sequence for the Well Balanced Chunk is the one defined by
11638 * the content production in the XML grammar:
11639 *
11640 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11641 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011642 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11643 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011644 *
11645 * In case recover is set to 1, the nodelist will not be empty even if
11646 * the parsed chunk is not well balanced.
11647 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011648static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011649xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11650 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11651 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011652 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011653 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011654 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011655 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011656 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011657 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011658 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011659
11660 if (oldctxt->depth > 40) {
11661 return(XML_ERR_ENTITY_LOOP);
11662 }
11663
11664
11665 if (lst != NULL)
11666 *lst = NULL;
11667 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011668 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011669
11670 size = xmlStrlen(string);
11671
11672 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011673 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011674 if (user_data != NULL)
11675 ctxt->userData = user_data;
11676 else
11677 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011678 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11679 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011680 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11681 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11682 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011683
11684 oldsax = ctxt->sax;
11685 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011686 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011687 ctxt->replaceEntities = oldctxt->replaceEntities;
11688 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011689
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011690 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011691 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011692 newDoc = xmlNewDoc(BAD_CAST "1.0");
11693 if (newDoc == NULL) {
11694 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011695 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011696 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011697 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011698 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011699 newDoc->dict = ctxt->dict;
11700 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011701 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011702 } else {
11703 ctxt->myDoc = oldctxt->myDoc;
11704 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011705 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011706 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011707 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11708 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011709 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011710 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011711 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011712 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011713 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011714 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011715 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011716 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011717 ctxt->myDoc->children = NULL;
11718 ctxt->myDoc->last = NULL;
11719 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011720 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011721 ctxt->instate = XML_PARSER_CONTENT;
11722 ctxt->depth = oldctxt->depth + 1;
11723
Daniel Veillard328f48c2002-11-15 15:24:34 +000011724 ctxt->validate = 0;
11725 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011726 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11727 /*
11728 * ID/IDREF registration will be done in xmlValidateElement below
11729 */
11730 ctxt->loadsubset |= XML_SKIP_IDS;
11731 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011732 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011733 ctxt->attsDefault = oldctxt->attsDefault;
11734 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011735
Daniel Veillard68e9e742002-11-16 15:35:11 +000011736 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011737 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011738 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011739 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011740 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011741 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011742 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011743 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011744 }
11745
11746 if (!ctxt->wellFormed) {
11747 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011748 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011749 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011750 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011751 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011752 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011753 }
11754
William M. Brack7b9154b2003-09-27 19:23:50 +000011755 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011756 xmlNodePtr cur;
11757
11758 /*
11759 * Return the newly created nodeset after unlinking it from
11760 * they pseudo parent.
11761 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011762 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011763 *lst = cur;
11764 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011765#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011766 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11767 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11768 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011769 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11770 oldctxt->myDoc, cur);
11771 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011772#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011773 cur->parent = NULL;
11774 cur = cur->next;
11775 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011776 ctxt->myDoc->children->children = NULL;
11777 }
11778 if (ctxt->myDoc != NULL) {
11779 xmlFreeNode(ctxt->myDoc->children);
11780 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011781 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011782 }
11783
11784 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011785 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011786 ctxt->attsDefault = NULL;
11787 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011788 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011789 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011790 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011791 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011792
11793 return(ret);
11794}
11795
Daniel Veillard29b17482004-08-16 00:39:03 +000011796/**
11797 * xmlParseInNodeContext:
11798 * @node: the context node
11799 * @data: the input string
11800 * @datalen: the input string length in bytes
11801 * @options: a combination of xmlParserOption
11802 * @lst: the return value for the set of parsed nodes
11803 *
11804 * Parse a well-balanced chunk of an XML document
11805 * within the context (DTD, namespaces, etc ...) of the given node.
11806 *
11807 * The allowed sequence for the data is a Well Balanced Chunk defined by
11808 * the content production in the XML grammar:
11809 *
11810 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11811 *
11812 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11813 * error code otherwise
11814 */
11815xmlParserErrors
11816xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11817 int options, xmlNodePtr *lst) {
11818#ifdef SAX2
11819 xmlParserCtxtPtr ctxt;
11820 xmlDocPtr doc = NULL;
11821 xmlNodePtr fake, cur;
11822 int nsnr = 0;
11823
11824 xmlParserErrors ret = XML_ERR_OK;
11825
11826 /*
11827 * check all input parameters, grab the document
11828 */
11829 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11830 return(XML_ERR_INTERNAL_ERROR);
11831 switch (node->type) {
11832 case XML_ELEMENT_NODE:
11833 case XML_ATTRIBUTE_NODE:
11834 case XML_TEXT_NODE:
11835 case XML_CDATA_SECTION_NODE:
11836 case XML_ENTITY_REF_NODE:
11837 case XML_PI_NODE:
11838 case XML_COMMENT_NODE:
11839 case XML_DOCUMENT_NODE:
11840 case XML_HTML_DOCUMENT_NODE:
11841 break;
11842 default:
11843 return(XML_ERR_INTERNAL_ERROR);
11844
11845 }
11846 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11847 (node->type != XML_DOCUMENT_NODE) &&
11848 (node->type != XML_HTML_DOCUMENT_NODE))
11849 node = node->parent;
11850 if (node == NULL)
11851 return(XML_ERR_INTERNAL_ERROR);
11852 if (node->type == XML_ELEMENT_NODE)
11853 doc = node->doc;
11854 else
11855 doc = (xmlDocPtr) node;
11856 if (doc == NULL)
11857 return(XML_ERR_INTERNAL_ERROR);
11858
11859 /*
11860 * allocate a context and set-up everything not related to the
11861 * node position in the tree
11862 */
11863 if (doc->type == XML_DOCUMENT_NODE)
11864 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11865#ifdef LIBXML_HTML_ENABLED
11866 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11867 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11868#endif
11869 else
11870 return(XML_ERR_INTERNAL_ERROR);
11871
11872 if (ctxt == NULL)
11873 return(XML_ERR_NO_MEMORY);
11874 fake = xmlNewComment(NULL);
11875 if (fake == NULL) {
11876 xmlFreeParserCtxt(ctxt);
11877 return(XML_ERR_NO_MEMORY);
11878 }
11879 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011880
11881 /*
11882 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11883 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11884 * we must wait until the last moment to free the original one.
11885 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011886 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011887 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011888 xmlDictFree(ctxt->dict);
11889 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011890 } else
11891 options |= XML_PARSE_NODICT;
11892
11893 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011894 xmlDetectSAX2(ctxt);
11895 ctxt->myDoc = doc;
11896
11897 if (node->type == XML_ELEMENT_NODE) {
11898 nodePush(ctxt, node);
11899 /*
11900 * initialize the SAX2 namespaces stack
11901 */
11902 cur = node;
11903 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11904 xmlNsPtr ns = cur->nsDef;
11905 const xmlChar *iprefix, *ihref;
11906
11907 while (ns != NULL) {
11908 if (ctxt->dict) {
11909 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11910 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11911 } else {
11912 iprefix = ns->prefix;
11913 ihref = ns->href;
11914 }
11915
11916 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11917 nsPush(ctxt, iprefix, ihref);
11918 nsnr++;
11919 }
11920 ns = ns->next;
11921 }
11922 cur = cur->parent;
11923 }
11924 ctxt->instate = XML_PARSER_CONTENT;
11925 }
11926
11927 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11928 /*
11929 * ID/IDREF registration will be done in xmlValidateElement below
11930 */
11931 ctxt->loadsubset |= XML_SKIP_IDS;
11932 }
11933
Daniel Veillard499cc922006-01-18 17:22:35 +000011934#ifdef LIBXML_HTML_ENABLED
11935 if (doc->type == XML_HTML_DOCUMENT_NODE)
11936 __htmlParseContent(ctxt);
11937 else
11938#endif
11939 xmlParseContent(ctxt);
11940
Daniel Veillard29b17482004-08-16 00:39:03 +000011941 nsPop(ctxt, nsnr);
11942 if ((RAW == '<') && (NXT(1) == '/')) {
11943 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11944 } else if (RAW != 0) {
11945 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11946 }
11947 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11948 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11949 ctxt->wellFormed = 0;
11950 }
11951
11952 if (!ctxt->wellFormed) {
11953 if (ctxt->errNo == 0)
11954 ret = XML_ERR_INTERNAL_ERROR;
11955 else
11956 ret = (xmlParserErrors)ctxt->errNo;
11957 } else {
11958 ret = XML_ERR_OK;
11959 }
11960
11961 /*
11962 * Return the newly created nodeset after unlinking it from
11963 * the pseudo sibling.
11964 */
11965
11966 cur = fake->next;
11967 fake->next = NULL;
11968 node->last = fake;
11969
11970 if (cur != NULL) {
11971 cur->prev = NULL;
11972 }
11973
11974 *lst = cur;
11975
11976 while (cur != NULL) {
11977 cur->parent = NULL;
11978 cur = cur->next;
11979 }
11980
11981 xmlUnlinkNode(fake);
11982 xmlFreeNode(fake);
11983
11984
11985 if (ret != XML_ERR_OK) {
11986 xmlFreeNodeList(*lst);
11987 *lst = NULL;
11988 }
William M. Brackc3f81342004-10-03 01:22:44 +000011989
William M. Brackb7b54de2004-10-06 16:38:01 +000011990 if (doc->dict != NULL)
11991 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011992 xmlFreeParserCtxt(ctxt);
11993
11994 return(ret);
11995#else /* !SAX2 */
11996 return(XML_ERR_INTERNAL_ERROR);
11997#endif
11998}
11999
Daniel Veillard81273902003-09-30 00:43:48 +000012000#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012001/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012002 * xmlParseBalancedChunkMemoryRecover:
12003 * @doc: the document the chunk pertains to
12004 * @sax: the SAX handler bloc (possibly NULL)
12005 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12006 * @depth: Used for loop detection, use 0
12007 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12008 * @lst: the return value for the set of parsed nodes
12009 * @recover: return nodes even if the data is broken (use 0)
12010 *
12011 *
12012 * Parse a well-balanced chunk of an XML document
12013 * called by the parser
12014 * The allowed sequence for the Well Balanced Chunk is the one defined by
12015 * the content production in the XML grammar:
12016 *
12017 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12018 *
12019 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12020 * the parser error code otherwise
12021 *
12022 * In case recover is set to 1, the nodelist will not be empty even if
12023 * the parsed chunk is not well balanced.
12024 */
12025int
12026xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12027 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12028 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012029 xmlParserCtxtPtr ctxt;
12030 xmlDocPtr newDoc;
12031 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012032 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012033 int size;
12034 int ret = 0;
12035
12036 if (depth > 40) {
12037 return(XML_ERR_ENTITY_LOOP);
12038 }
12039
12040
Daniel Veillardcda96922001-08-21 10:56:31 +000012041 if (lst != NULL)
12042 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012043 if (string == NULL)
12044 return(-1);
12045
12046 size = xmlStrlen(string);
12047
12048 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12049 if (ctxt == NULL) return(-1);
12050 ctxt->userData = ctxt;
12051 if (sax != NULL) {
12052 oldsax = ctxt->sax;
12053 ctxt->sax = sax;
12054 if (user_data != NULL)
12055 ctxt->userData = user_data;
12056 }
12057 newDoc = xmlNewDoc(BAD_CAST "1.0");
12058 if (newDoc == NULL) {
12059 xmlFreeParserCtxt(ctxt);
12060 return(-1);
12061 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012062 if ((doc != NULL) && (doc->dict != NULL)) {
12063 xmlDictFree(ctxt->dict);
12064 ctxt->dict = doc->dict;
12065 xmlDictReference(ctxt->dict);
12066 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12067 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12068 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12069 ctxt->dictNames = 1;
12070 } else {
12071 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12072 }
Owen Taylor3473f882001-02-23 17:55:21 +000012073 if (doc != NULL) {
12074 newDoc->intSubset = doc->intSubset;
12075 newDoc->extSubset = doc->extSubset;
12076 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012077 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12078 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012079 if (sax != NULL)
12080 ctxt->sax = oldsax;
12081 xmlFreeParserCtxt(ctxt);
12082 newDoc->intSubset = NULL;
12083 newDoc->extSubset = NULL;
12084 xmlFreeDoc(newDoc);
12085 return(-1);
12086 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012087 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12088 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012089 if (doc == NULL) {
12090 ctxt->myDoc = newDoc;
12091 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012092 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012093 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012094 /* Ensure that doc has XML spec namespace */
12095 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12096 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012097 }
12098 ctxt->instate = XML_PARSER_CONTENT;
12099 ctxt->depth = depth;
12100
12101 /*
12102 * Doing validity checking on chunk doesn't make sense
12103 */
12104 ctxt->validate = 0;
12105 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012106 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012107
Daniel Veillardb39bc392002-10-26 19:29:51 +000012108 if ( doc != NULL ){
12109 content = doc->children;
12110 doc->children = NULL;
12111 xmlParseContent(ctxt);
12112 doc->children = content;
12113 }
12114 else {
12115 xmlParseContent(ctxt);
12116 }
Owen Taylor3473f882001-02-23 17:55:21 +000012117 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012119 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012120 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012121 }
12122 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012123 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012124 }
12125
12126 if (!ctxt->wellFormed) {
12127 if (ctxt->errNo == 0)
12128 ret = 1;
12129 else
12130 ret = ctxt->errNo;
12131 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012132 ret = 0;
12133 }
12134
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012135 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12136 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012137
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012138 /*
12139 * Return the newly created nodeset after unlinking it from
12140 * they pseudo parent.
12141 */
12142 cur = newDoc->children->children;
12143 *lst = cur;
12144 while (cur != NULL) {
12145 xmlSetTreeDoc(cur, doc);
12146 cur->parent = NULL;
12147 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012148 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012149 newDoc->children->children = NULL;
12150 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012151
Owen Taylor3473f882001-02-23 17:55:21 +000012152 if (sax != NULL)
12153 ctxt->sax = oldsax;
12154 xmlFreeParserCtxt(ctxt);
12155 newDoc->intSubset = NULL;
12156 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012157 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012158 xmlFreeDoc(newDoc);
12159
12160 return(ret);
12161}
12162
12163/**
12164 * xmlSAXParseEntity:
12165 * @sax: the SAX handler block
12166 * @filename: the filename
12167 *
12168 * parse an XML external entity out of context and build a tree.
12169 * It use the given SAX function block to handle the parsing callback.
12170 * If sax is NULL, fallback to the default DOM tree building routines.
12171 *
12172 * [78] extParsedEnt ::= TextDecl? content
12173 *
12174 * This correspond to a "Well Balanced" chunk
12175 *
12176 * Returns the resulting document tree
12177 */
12178
12179xmlDocPtr
12180xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12181 xmlDocPtr ret;
12182 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012183
12184 ctxt = xmlCreateFileParserCtxt(filename);
12185 if (ctxt == NULL) {
12186 return(NULL);
12187 }
12188 if (sax != NULL) {
12189 if (ctxt->sax != NULL)
12190 xmlFree(ctxt->sax);
12191 ctxt->sax = sax;
12192 ctxt->userData = NULL;
12193 }
12194
Owen Taylor3473f882001-02-23 17:55:21 +000012195 xmlParseExtParsedEnt(ctxt);
12196
12197 if (ctxt->wellFormed)
12198 ret = ctxt->myDoc;
12199 else {
12200 ret = NULL;
12201 xmlFreeDoc(ctxt->myDoc);
12202 ctxt->myDoc = NULL;
12203 }
12204 if (sax != NULL)
12205 ctxt->sax = NULL;
12206 xmlFreeParserCtxt(ctxt);
12207
12208 return(ret);
12209}
12210
12211/**
12212 * xmlParseEntity:
12213 * @filename: the filename
12214 *
12215 * parse an XML external entity out of context and build a tree.
12216 *
12217 * [78] extParsedEnt ::= TextDecl? content
12218 *
12219 * This correspond to a "Well Balanced" chunk
12220 *
12221 * Returns the resulting document tree
12222 */
12223
12224xmlDocPtr
12225xmlParseEntity(const char *filename) {
12226 return(xmlSAXParseEntity(NULL, filename));
12227}
Daniel Veillard81273902003-09-30 00:43:48 +000012228#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012229
12230/**
12231 * xmlCreateEntityParserCtxt:
12232 * @URL: the entity URL
12233 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012234 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012235 *
12236 * Create a parser context for an external entity
12237 * Automatic support for ZLIB/Compress compressed document is provided
12238 * by default if found at compile-time.
12239 *
12240 * Returns the new parser context or NULL
12241 */
12242xmlParserCtxtPtr
12243xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12244 const xmlChar *base) {
12245 xmlParserCtxtPtr ctxt;
12246 xmlParserInputPtr inputStream;
12247 char *directory = NULL;
12248 xmlChar *uri;
12249
12250 ctxt = xmlNewParserCtxt();
12251 if (ctxt == NULL) {
12252 return(NULL);
12253 }
12254
12255 uri = xmlBuildURI(URL, base);
12256
12257 if (uri == NULL) {
12258 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12259 if (inputStream == NULL) {
12260 xmlFreeParserCtxt(ctxt);
12261 return(NULL);
12262 }
12263
12264 inputPush(ctxt, inputStream);
12265
12266 if ((ctxt->directory == NULL) && (directory == NULL))
12267 directory = xmlParserGetDirectory((char *)URL);
12268 if ((ctxt->directory == NULL) && (directory != NULL))
12269 ctxt->directory = directory;
12270 } else {
12271 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12272 if (inputStream == NULL) {
12273 xmlFree(uri);
12274 xmlFreeParserCtxt(ctxt);
12275 return(NULL);
12276 }
12277
12278 inputPush(ctxt, inputStream);
12279
12280 if ((ctxt->directory == NULL) && (directory == NULL))
12281 directory = xmlParserGetDirectory((char *)uri);
12282 if ((ctxt->directory == NULL) && (directory != NULL))
12283 ctxt->directory = directory;
12284 xmlFree(uri);
12285 }
Owen Taylor3473f882001-02-23 17:55:21 +000012286 return(ctxt);
12287}
12288
12289/************************************************************************
12290 * *
12291 * Front ends when parsing from a file *
12292 * *
12293 ************************************************************************/
12294
12295/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012296 * xmlCreateURLParserCtxt:
12297 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012298 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012299 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012300 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012301 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012302 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012303 *
12304 * Returns the new parser context or NULL
12305 */
12306xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012307xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012308{
12309 xmlParserCtxtPtr ctxt;
12310 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012311 char *directory = NULL;
12312
Owen Taylor3473f882001-02-23 17:55:21 +000012313 ctxt = xmlNewParserCtxt();
12314 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012315 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012316 return(NULL);
12317 }
12318
Daniel Veillarddf292f72005-01-16 19:00:15 +000012319 if (options)
12320 xmlCtxtUseOptions(ctxt, options);
12321 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012322
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012323 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012324 if (inputStream == NULL) {
12325 xmlFreeParserCtxt(ctxt);
12326 return(NULL);
12327 }
12328
Owen Taylor3473f882001-02-23 17:55:21 +000012329 inputPush(ctxt, inputStream);
12330 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012331 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012332 if ((ctxt->directory == NULL) && (directory != NULL))
12333 ctxt->directory = directory;
12334
12335 return(ctxt);
12336}
12337
Daniel Veillard61b93382003-11-03 14:28:31 +000012338/**
12339 * xmlCreateFileParserCtxt:
12340 * @filename: the filename
12341 *
12342 * Create a parser context for a file content.
12343 * Automatic support for ZLIB/Compress compressed document is provided
12344 * by default if found at compile-time.
12345 *
12346 * Returns the new parser context or NULL
12347 */
12348xmlParserCtxtPtr
12349xmlCreateFileParserCtxt(const char *filename)
12350{
12351 return(xmlCreateURLParserCtxt(filename, 0));
12352}
12353
Daniel Veillard81273902003-09-30 00:43:48 +000012354#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012355/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012356 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012357 * @sax: the SAX handler block
12358 * @filename: the filename
12359 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12360 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012361 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012362 *
12363 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12364 * compressed document is provided by default if found at compile-time.
12365 * It use the given SAX function block to handle the parsing callback.
12366 * If sax is NULL, fallback to the default DOM tree building routines.
12367 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012368 * User data (void *) is stored within the parser context in the
12369 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012370 *
Owen Taylor3473f882001-02-23 17:55:21 +000012371 * Returns the resulting document tree
12372 */
12373
12374xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012375xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12376 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012377 xmlDocPtr ret;
12378 xmlParserCtxtPtr ctxt;
12379 char *directory = NULL;
12380
Daniel Veillard635ef722001-10-29 11:48:19 +000012381 xmlInitParser();
12382
Owen Taylor3473f882001-02-23 17:55:21 +000012383 ctxt = xmlCreateFileParserCtxt(filename);
12384 if (ctxt == NULL) {
12385 return(NULL);
12386 }
12387 if (sax != NULL) {
12388 if (ctxt->sax != NULL)
12389 xmlFree(ctxt->sax);
12390 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012391 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012392 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012393 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012394 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012395 }
Owen Taylor3473f882001-02-23 17:55:21 +000012396
12397 if ((ctxt->directory == NULL) && (directory == NULL))
12398 directory = xmlParserGetDirectory(filename);
12399 if ((ctxt->directory == NULL) && (directory != NULL))
12400 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12401
Daniel Veillarddad3f682002-11-17 16:47:27 +000012402 ctxt->recovery = recovery;
12403
Owen Taylor3473f882001-02-23 17:55:21 +000012404 xmlParseDocument(ctxt);
12405
William M. Brackc07329e2003-09-08 01:57:30 +000012406 if ((ctxt->wellFormed) || recovery) {
12407 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012408 if (ret != NULL) {
12409 if (ctxt->input->buf->compressed > 0)
12410 ret->compression = 9;
12411 else
12412 ret->compression = ctxt->input->buf->compressed;
12413 }
William M. Brackc07329e2003-09-08 01:57:30 +000012414 }
Owen Taylor3473f882001-02-23 17:55:21 +000012415 else {
12416 ret = NULL;
12417 xmlFreeDoc(ctxt->myDoc);
12418 ctxt->myDoc = NULL;
12419 }
12420 if (sax != NULL)
12421 ctxt->sax = NULL;
12422 xmlFreeParserCtxt(ctxt);
12423
12424 return(ret);
12425}
12426
12427/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012428 * xmlSAXParseFile:
12429 * @sax: the SAX handler block
12430 * @filename: the filename
12431 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12432 * documents
12433 *
12434 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12435 * compressed document is provided by default if found at compile-time.
12436 * It use the given SAX function block to handle the parsing callback.
12437 * If sax is NULL, fallback to the default DOM tree building routines.
12438 *
12439 * Returns the resulting document tree
12440 */
12441
12442xmlDocPtr
12443xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12444 int recovery) {
12445 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12446}
12447
12448/**
Owen Taylor3473f882001-02-23 17:55:21 +000012449 * xmlRecoverDoc:
12450 * @cur: a pointer to an array of xmlChar
12451 *
12452 * parse an XML in-memory document and build a tree.
12453 * In the case the document is not Well Formed, a tree is built anyway
12454 *
12455 * Returns the resulting document tree
12456 */
12457
12458xmlDocPtr
12459xmlRecoverDoc(xmlChar *cur) {
12460 return(xmlSAXParseDoc(NULL, cur, 1));
12461}
12462
12463/**
12464 * xmlParseFile:
12465 * @filename: the filename
12466 *
12467 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12468 * compressed document is provided by default if found at compile-time.
12469 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012470 * Returns the resulting document tree if the file was wellformed,
12471 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012472 */
12473
12474xmlDocPtr
12475xmlParseFile(const char *filename) {
12476 return(xmlSAXParseFile(NULL, filename, 0));
12477}
12478
12479/**
12480 * xmlRecoverFile:
12481 * @filename: the filename
12482 *
12483 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12484 * compressed document is provided by default if found at compile-time.
12485 * In the case the document is not Well Formed, a tree is built anyway
12486 *
12487 * Returns the resulting document tree
12488 */
12489
12490xmlDocPtr
12491xmlRecoverFile(const char *filename) {
12492 return(xmlSAXParseFile(NULL, filename, 1));
12493}
12494
12495
12496/**
12497 * xmlSetupParserForBuffer:
12498 * @ctxt: an XML parser context
12499 * @buffer: a xmlChar * buffer
12500 * @filename: a file name
12501 *
12502 * Setup the parser context to parse a new buffer; Clears any prior
12503 * contents from the parser context. The buffer parameter must not be
12504 * NULL, but the filename parameter can be
12505 */
12506void
12507xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12508 const char* filename)
12509{
12510 xmlParserInputPtr input;
12511
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012512 if ((ctxt == NULL) || (buffer == NULL))
12513 return;
12514
Owen Taylor3473f882001-02-23 17:55:21 +000012515 input = xmlNewInputStream(ctxt);
12516 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012517 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012518 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012519 return;
12520 }
12521
12522 xmlClearParserCtxt(ctxt);
12523 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012524 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012525 input->base = buffer;
12526 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012527 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012528 inputPush(ctxt, input);
12529}
12530
12531/**
12532 * xmlSAXUserParseFile:
12533 * @sax: a SAX handler
12534 * @user_data: The user data returned on SAX callbacks
12535 * @filename: a file name
12536 *
12537 * parse an XML file and call the given SAX handler routines.
12538 * Automatic support for ZLIB/Compress compressed document is provided
12539 *
12540 * Returns 0 in case of success or a error number otherwise
12541 */
12542int
12543xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12544 const char *filename) {
12545 int ret = 0;
12546 xmlParserCtxtPtr ctxt;
12547
12548 ctxt = xmlCreateFileParserCtxt(filename);
12549 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000012550 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000012551 xmlFree(ctxt->sax);
12552 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012553 xmlDetectSAX2(ctxt);
12554
Owen Taylor3473f882001-02-23 17:55:21 +000012555 if (user_data != NULL)
12556 ctxt->userData = user_data;
12557
12558 xmlParseDocument(ctxt);
12559
12560 if (ctxt->wellFormed)
12561 ret = 0;
12562 else {
12563 if (ctxt->errNo != 0)
12564 ret = ctxt->errNo;
12565 else
12566 ret = -1;
12567 }
12568 if (sax != NULL)
12569 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012570 if (ctxt->myDoc != NULL) {
12571 xmlFreeDoc(ctxt->myDoc);
12572 ctxt->myDoc = NULL;
12573 }
Owen Taylor3473f882001-02-23 17:55:21 +000012574 xmlFreeParserCtxt(ctxt);
12575
12576 return ret;
12577}
Daniel Veillard81273902003-09-30 00:43:48 +000012578#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012579
12580/************************************************************************
12581 * *
12582 * Front ends when parsing from memory *
12583 * *
12584 ************************************************************************/
12585
12586/**
12587 * xmlCreateMemoryParserCtxt:
12588 * @buffer: a pointer to a char array
12589 * @size: the size of the array
12590 *
12591 * Create a parser context for an XML in-memory document.
12592 *
12593 * Returns the new parser context or NULL
12594 */
12595xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012596xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012597 xmlParserCtxtPtr ctxt;
12598 xmlParserInputPtr input;
12599 xmlParserInputBufferPtr buf;
12600
12601 if (buffer == NULL)
12602 return(NULL);
12603 if (size <= 0)
12604 return(NULL);
12605
12606 ctxt = xmlNewParserCtxt();
12607 if (ctxt == NULL)
12608 return(NULL);
12609
Daniel Veillard53350552003-09-18 13:35:51 +000012610 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012611 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012612 if (buf == NULL) {
12613 xmlFreeParserCtxt(ctxt);
12614 return(NULL);
12615 }
Owen Taylor3473f882001-02-23 17:55:21 +000012616
12617 input = xmlNewInputStream(ctxt);
12618 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012619 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012620 xmlFreeParserCtxt(ctxt);
12621 return(NULL);
12622 }
12623
12624 input->filename = NULL;
12625 input->buf = buf;
12626 input->base = input->buf->buffer->content;
12627 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012628 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012629
12630 inputPush(ctxt, input);
12631 return(ctxt);
12632}
12633
Daniel Veillard81273902003-09-30 00:43:48 +000012634#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012635/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012636 * xmlSAXParseMemoryWithData:
12637 * @sax: the SAX handler block
12638 * @buffer: an pointer to a char array
12639 * @size: the size of the array
12640 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12641 * documents
12642 * @data: the userdata
12643 *
12644 * parse an XML in-memory block and use the given SAX function block
12645 * to handle the parsing callback. If sax is NULL, fallback to the default
12646 * DOM tree building routines.
12647 *
12648 * User data (void *) is stored within the parser context in the
12649 * context's _private member, so it is available nearly everywhere in libxml
12650 *
12651 * Returns the resulting document tree
12652 */
12653
12654xmlDocPtr
12655xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12656 int size, int recovery, void *data) {
12657 xmlDocPtr ret;
12658 xmlParserCtxtPtr ctxt;
12659
12660 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12661 if (ctxt == NULL) return(NULL);
12662 if (sax != NULL) {
12663 if (ctxt->sax != NULL)
12664 xmlFree(ctxt->sax);
12665 ctxt->sax = sax;
12666 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012667 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012668 if (data!=NULL) {
12669 ctxt->_private=data;
12670 }
12671
Daniel Veillardadba5f12003-04-04 16:09:01 +000012672 ctxt->recovery = recovery;
12673
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012674 xmlParseDocument(ctxt);
12675
12676 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12677 else {
12678 ret = NULL;
12679 xmlFreeDoc(ctxt->myDoc);
12680 ctxt->myDoc = NULL;
12681 }
12682 if (sax != NULL)
12683 ctxt->sax = NULL;
12684 xmlFreeParserCtxt(ctxt);
12685
12686 return(ret);
12687}
12688
12689/**
Owen Taylor3473f882001-02-23 17:55:21 +000012690 * xmlSAXParseMemory:
12691 * @sax: the SAX handler block
12692 * @buffer: an pointer to a char array
12693 * @size: the size of the array
12694 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12695 * documents
12696 *
12697 * parse an XML in-memory block and use the given SAX function block
12698 * to handle the parsing callback. If sax is NULL, fallback to the default
12699 * DOM tree building routines.
12700 *
12701 * Returns the resulting document tree
12702 */
12703xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012704xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12705 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012706 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012707}
12708
12709/**
12710 * xmlParseMemory:
12711 * @buffer: an pointer to a char array
12712 * @size: the size of the array
12713 *
12714 * parse an XML in-memory block and build a tree.
12715 *
12716 * Returns the resulting document tree
12717 */
12718
Daniel Veillard50822cb2001-07-26 20:05:51 +000012719xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012720 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12721}
12722
12723/**
12724 * xmlRecoverMemory:
12725 * @buffer: an pointer to a char array
12726 * @size: the size of the array
12727 *
12728 * parse an XML in-memory block and build a tree.
12729 * In the case the document is not Well Formed, a tree is built anyway
12730 *
12731 * Returns the resulting document tree
12732 */
12733
Daniel Veillard50822cb2001-07-26 20:05:51 +000012734xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012735 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12736}
12737
12738/**
12739 * xmlSAXUserParseMemory:
12740 * @sax: a SAX handler
12741 * @user_data: The user data returned on SAX callbacks
12742 * @buffer: an in-memory XML document input
12743 * @size: the length of the XML document in bytes
12744 *
12745 * A better SAX parsing routine.
12746 * parse an XML in-memory buffer and call the given SAX handler routines.
12747 *
12748 * Returns 0 in case of success or a error number otherwise
12749 */
12750int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012751 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012752 int ret = 0;
12753 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012754
12755 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12756 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012757 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12758 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000012759 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012760 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012761
Daniel Veillard30211a02001-04-26 09:33:18 +000012762 if (user_data != NULL)
12763 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012764
12765 xmlParseDocument(ctxt);
12766
12767 if (ctxt->wellFormed)
12768 ret = 0;
12769 else {
12770 if (ctxt->errNo != 0)
12771 ret = ctxt->errNo;
12772 else
12773 ret = -1;
12774 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000012775 if (sax != NULL)
12776 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012777 if (ctxt->myDoc != NULL) {
12778 xmlFreeDoc(ctxt->myDoc);
12779 ctxt->myDoc = NULL;
12780 }
Owen Taylor3473f882001-02-23 17:55:21 +000012781 xmlFreeParserCtxt(ctxt);
12782
12783 return ret;
12784}
Daniel Veillard81273902003-09-30 00:43:48 +000012785#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012786
12787/**
12788 * xmlCreateDocParserCtxt:
12789 * @cur: a pointer to an array of xmlChar
12790 *
12791 * Creates a parser context for an XML in-memory document.
12792 *
12793 * Returns the new parser context or NULL
12794 */
12795xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012796xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012797 int len;
12798
12799 if (cur == NULL)
12800 return(NULL);
12801 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012802 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012803}
12804
Daniel Veillard81273902003-09-30 00:43:48 +000012805#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012806/**
12807 * xmlSAXParseDoc:
12808 * @sax: the SAX handler block
12809 * @cur: a pointer to an array of xmlChar
12810 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12811 * documents
12812 *
12813 * parse an XML in-memory document and build a tree.
12814 * It use the given SAX function block to handle the parsing callback.
12815 * If sax is NULL, fallback to the default DOM tree building routines.
12816 *
12817 * Returns the resulting document tree
12818 */
12819
12820xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012821xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012822 xmlDocPtr ret;
12823 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012824 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012825
Daniel Veillard38936062004-11-04 17:45:11 +000012826 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012827
12828
12829 ctxt = xmlCreateDocParserCtxt(cur);
12830 if (ctxt == NULL) return(NULL);
12831 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012832 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012833 ctxt->sax = sax;
12834 ctxt->userData = NULL;
12835 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012836 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012837
12838 xmlParseDocument(ctxt);
12839 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12840 else {
12841 ret = NULL;
12842 xmlFreeDoc(ctxt->myDoc);
12843 ctxt->myDoc = NULL;
12844 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012845 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012846 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012847 xmlFreeParserCtxt(ctxt);
12848
12849 return(ret);
12850}
12851
12852/**
12853 * xmlParseDoc:
12854 * @cur: a pointer to an array of xmlChar
12855 *
12856 * parse an XML in-memory document and build a tree.
12857 *
12858 * Returns the resulting document tree
12859 */
12860
12861xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012862xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012863 return(xmlSAXParseDoc(NULL, cur, 0));
12864}
Daniel Veillard81273902003-09-30 00:43:48 +000012865#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012866
Daniel Veillard81273902003-09-30 00:43:48 +000012867#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012868/************************************************************************
12869 * *
12870 * Specific function to keep track of entities references *
12871 * and used by the XSLT debugger *
12872 * *
12873 ************************************************************************/
12874
12875static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12876
12877/**
12878 * xmlAddEntityReference:
12879 * @ent : A valid entity
12880 * @firstNode : A valid first node for children of entity
12881 * @lastNode : A valid last node of children entity
12882 *
12883 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12884 */
12885static void
12886xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12887 xmlNodePtr lastNode)
12888{
12889 if (xmlEntityRefFunc != NULL) {
12890 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12891 }
12892}
12893
12894
12895/**
12896 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012897 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012898 *
12899 * Set the function to call call back when a xml reference has been made
12900 */
12901void
12902xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12903{
12904 xmlEntityRefFunc = func;
12905}
Daniel Veillard81273902003-09-30 00:43:48 +000012906#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012907
12908/************************************************************************
12909 * *
12910 * Miscellaneous *
12911 * *
12912 ************************************************************************/
12913
12914#ifdef LIBXML_XPATH_ENABLED
12915#include <libxml/xpath.h>
12916#endif
12917
Daniel Veillardffa3c742005-07-21 13:24:09 +000012918extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012919static int xmlParserInitialized = 0;
12920
12921/**
12922 * xmlInitParser:
12923 *
12924 * Initialization function for the XML parser.
12925 * This is not reentrant. Call once before processing in case of
12926 * use in multithreaded programs.
12927 */
12928
12929void
12930xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012931 if (xmlParserInitialized != 0)
12932 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012933
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012934#ifdef LIBXML_THREAD_ENABLED
12935 __xmlGlobalInitMutexLock();
12936 if (xmlParserInitialized == 0) {
12937#endif
12938 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12939 (xmlGenericError == NULL))
12940 initGenericErrorDefaultFunc(NULL);
12941 xmlInitGlobals();
12942 xmlInitThreads();
12943 xmlInitMemory();
12944 xmlInitCharEncodingHandlers();
12945 xmlDefaultSAXHandlerInit();
12946 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012947#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012948 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012949#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012950#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012951 htmlInitAutoClose();
12952 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012953#endif
12954#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012955 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012956#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012957 xmlParserInitialized = 1;
12958#ifdef LIBXML_THREAD_ENABLED
12959 }
12960 __xmlGlobalInitMutexUnlock();
12961#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012962}
12963
12964/**
12965 * xmlCleanupParser:
12966 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012967 * Cleanup function for the XML library. It tries to reclaim all
12968 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012969 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012970 * function should not prevent reusing the library but one should
12971 * call xmlCleanupParser() only when the process has
Daniel Veillardccc476f2008-03-04 13:19:49 +000012972 * finished using the library and all XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012973 */
12974
12975void
12976xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012977 if (!xmlParserInitialized)
12978 return;
12979
Owen Taylor3473f882001-02-23 17:55:21 +000012980 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012981#ifdef LIBXML_CATALOG_ENABLED
12982 xmlCatalogCleanup();
12983#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012984 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012985 xmlCleanupInputCallbacks();
12986#ifdef LIBXML_OUTPUT_ENABLED
12987 xmlCleanupOutputCallbacks();
12988#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012989#ifdef LIBXML_SCHEMAS_ENABLED
12990 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012991 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012992#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012993 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012994 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012995 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012996 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012997 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012998}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012999
13000/************************************************************************
13001 * *
13002 * New set (2.6.0) of simpler and more flexible APIs *
13003 * *
13004 ************************************************************************/
13005
13006/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013007 * DICT_FREE:
13008 * @str: a string
13009 *
13010 * Free a string if it is not owned by the "dict" dictionnary in the
13011 * current scope
13012 */
13013#define DICT_FREE(str) \
13014 if ((str) && ((!dict) || \
13015 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13016 xmlFree((char *)(str));
13017
13018/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013019 * xmlCtxtReset:
13020 * @ctxt: an XML parser context
13021 *
13022 * Reset a parser context
13023 */
13024void
13025xmlCtxtReset(xmlParserCtxtPtr ctxt)
13026{
13027 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013028 xmlDictPtr dict;
13029
13030 if (ctxt == NULL)
13031 return;
13032
13033 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013034
13035 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13036 xmlFreeInputStream(input);
13037 }
13038 ctxt->inputNr = 0;
13039 ctxt->input = NULL;
13040
13041 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013042 if (ctxt->spaceTab != NULL) {
13043 ctxt->spaceTab[0] = -1;
13044 ctxt->space = &ctxt->spaceTab[0];
13045 } else {
13046 ctxt->space = NULL;
13047 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013048
13049
13050 ctxt->nodeNr = 0;
13051 ctxt->node = NULL;
13052
13053 ctxt->nameNr = 0;
13054 ctxt->name = NULL;
13055
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013056 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013057 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013058 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013059 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013060 DICT_FREE(ctxt->directory);
13061 ctxt->directory = NULL;
13062 DICT_FREE(ctxt->extSubURI);
13063 ctxt->extSubURI = NULL;
13064 DICT_FREE(ctxt->extSubSystem);
13065 ctxt->extSubSystem = NULL;
13066 if (ctxt->myDoc != NULL)
13067 xmlFreeDoc(ctxt->myDoc);
13068 ctxt->myDoc = NULL;
13069
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013070 ctxt->standalone = -1;
13071 ctxt->hasExternalSubset = 0;
13072 ctxt->hasPErefs = 0;
13073 ctxt->html = 0;
13074 ctxt->external = 0;
13075 ctxt->instate = XML_PARSER_START;
13076 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013077
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013078 ctxt->wellFormed = 1;
13079 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013080 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013081 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013082#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013083 ctxt->vctxt.userData = ctxt;
13084 ctxt->vctxt.error = xmlParserValidityError;
13085 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013086#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013087 ctxt->record_info = 0;
13088 ctxt->nbChars = 0;
13089 ctxt->checkIndex = 0;
13090 ctxt->inSubset = 0;
13091 ctxt->errNo = XML_ERR_OK;
13092 ctxt->depth = 0;
13093 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13094 ctxt->catalogs = NULL;
13095 xmlInitNodeInfoSeq(&ctxt->node_seq);
13096
13097 if (ctxt->attsDefault != NULL) {
13098 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13099 ctxt->attsDefault = NULL;
13100 }
13101 if (ctxt->attsSpecial != NULL) {
13102 xmlHashFree(ctxt->attsSpecial, NULL);
13103 ctxt->attsSpecial = NULL;
13104 }
13105
Daniel Veillard4432df22003-09-28 18:58:27 +000013106#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013107 if (ctxt->catalogs != NULL)
13108 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013109#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013110 if (ctxt->lastError.code != XML_ERR_OK)
13111 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013112}
13113
13114/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013115 * xmlCtxtResetPush:
13116 * @ctxt: an XML parser context
13117 * @chunk: a pointer to an array of chars
13118 * @size: number of chars in the array
13119 * @filename: an optional file name or URI
13120 * @encoding: the document encoding, or NULL
13121 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013122 * Reset a push parser context
13123 *
13124 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013125 */
13126int
13127xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13128 int size, const char *filename, const char *encoding)
13129{
13130 xmlParserInputPtr inputStream;
13131 xmlParserInputBufferPtr buf;
13132 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13133
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013134 if (ctxt == NULL)
13135 return(1);
13136
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013137 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13138 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13139
13140 buf = xmlAllocParserInputBuffer(enc);
13141 if (buf == NULL)
13142 return(1);
13143
13144 if (ctxt == NULL) {
13145 xmlFreeParserInputBuffer(buf);
13146 return(1);
13147 }
13148
13149 xmlCtxtReset(ctxt);
13150
13151 if (ctxt->pushTab == NULL) {
13152 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13153 sizeof(xmlChar *));
13154 if (ctxt->pushTab == NULL) {
13155 xmlErrMemory(ctxt, NULL);
13156 xmlFreeParserInputBuffer(buf);
13157 return(1);
13158 }
13159 }
13160
13161 if (filename == NULL) {
13162 ctxt->directory = NULL;
13163 } else {
13164 ctxt->directory = xmlParserGetDirectory(filename);
13165 }
13166
13167 inputStream = xmlNewInputStream(ctxt);
13168 if (inputStream == NULL) {
13169 xmlFreeParserInputBuffer(buf);
13170 return(1);
13171 }
13172
13173 if (filename == NULL)
13174 inputStream->filename = NULL;
13175 else
13176 inputStream->filename = (char *)
13177 xmlCanonicPath((const xmlChar *) filename);
13178 inputStream->buf = buf;
13179 inputStream->base = inputStream->buf->buffer->content;
13180 inputStream->cur = inputStream->buf->buffer->content;
13181 inputStream->end =
13182 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13183
13184 inputPush(ctxt, inputStream);
13185
13186 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13187 (ctxt->input->buf != NULL)) {
13188 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13189 int cur = ctxt->input->cur - ctxt->input->base;
13190
13191 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13192
13193 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13194 ctxt->input->cur = ctxt->input->base + cur;
13195 ctxt->input->end =
13196 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13197 use];
13198#ifdef DEBUG_PUSH
13199 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13200#endif
13201 }
13202
13203 if (encoding != NULL) {
13204 xmlCharEncodingHandlerPtr hdlr;
13205
13206 hdlr = xmlFindCharEncodingHandler(encoding);
13207 if (hdlr != NULL) {
13208 xmlSwitchToEncoding(ctxt, hdlr);
13209 } else {
13210 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13211 "Unsupported encoding %s\n", BAD_CAST encoding);
13212 }
13213 } else if (enc != XML_CHAR_ENCODING_NONE) {
13214 xmlSwitchEncoding(ctxt, enc);
13215 }
13216
13217 return(0);
13218}
13219
13220/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013221 * xmlCtxtUseOptions:
13222 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013223 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013224 *
13225 * Applies the options to the parser context
13226 *
13227 * Returns 0 in case of success, the set of unknown or unimplemented options
13228 * in case of error.
13229 */
13230int
13231xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13232{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013233 if (ctxt == NULL)
13234 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013235 if (options & XML_PARSE_RECOVER) {
13236 ctxt->recovery = 1;
13237 options -= XML_PARSE_RECOVER;
13238 } else
13239 ctxt->recovery = 0;
13240 if (options & XML_PARSE_DTDLOAD) {
13241 ctxt->loadsubset = XML_DETECT_IDS;
13242 options -= XML_PARSE_DTDLOAD;
13243 } else
13244 ctxt->loadsubset = 0;
13245 if (options & XML_PARSE_DTDATTR) {
13246 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13247 options -= XML_PARSE_DTDATTR;
13248 }
13249 if (options & XML_PARSE_NOENT) {
13250 ctxt->replaceEntities = 1;
13251 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13252 options -= XML_PARSE_NOENT;
13253 } else
13254 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013255 if (options & XML_PARSE_PEDANTIC) {
13256 ctxt->pedantic = 1;
13257 options -= XML_PARSE_PEDANTIC;
13258 } else
13259 ctxt->pedantic = 0;
13260 if (options & XML_PARSE_NOBLANKS) {
13261 ctxt->keepBlanks = 0;
13262 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13263 options -= XML_PARSE_NOBLANKS;
13264 } else
13265 ctxt->keepBlanks = 1;
13266 if (options & XML_PARSE_DTDVALID) {
13267 ctxt->validate = 1;
13268 if (options & XML_PARSE_NOWARNING)
13269 ctxt->vctxt.warning = NULL;
13270 if (options & XML_PARSE_NOERROR)
13271 ctxt->vctxt.error = NULL;
13272 options -= XML_PARSE_DTDVALID;
13273 } else
13274 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013275 if (options & XML_PARSE_NOWARNING) {
13276 ctxt->sax->warning = NULL;
13277 options -= XML_PARSE_NOWARNING;
13278 }
13279 if (options & XML_PARSE_NOERROR) {
13280 ctxt->sax->error = NULL;
13281 ctxt->sax->fatalError = NULL;
13282 options -= XML_PARSE_NOERROR;
13283 }
Daniel Veillard81273902003-09-30 00:43:48 +000013284#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013285 if (options & XML_PARSE_SAX1) {
13286 ctxt->sax->startElement = xmlSAX2StartElement;
13287 ctxt->sax->endElement = xmlSAX2EndElement;
13288 ctxt->sax->startElementNs = NULL;
13289 ctxt->sax->endElementNs = NULL;
13290 ctxt->sax->initialized = 1;
13291 options -= XML_PARSE_SAX1;
13292 }
Daniel Veillard81273902003-09-30 00:43:48 +000013293#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013294 if (options & XML_PARSE_NODICT) {
13295 ctxt->dictNames = 0;
13296 options -= XML_PARSE_NODICT;
13297 } else {
13298 ctxt->dictNames = 1;
13299 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013300 if (options & XML_PARSE_NOCDATA) {
13301 ctxt->sax->cdataBlock = NULL;
13302 options -= XML_PARSE_NOCDATA;
13303 }
13304 if (options & XML_PARSE_NSCLEAN) {
13305 ctxt->options |= XML_PARSE_NSCLEAN;
13306 options -= XML_PARSE_NSCLEAN;
13307 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013308 if (options & XML_PARSE_NONET) {
13309 ctxt->options |= XML_PARSE_NONET;
13310 options -= XML_PARSE_NONET;
13311 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013312 if (options & XML_PARSE_COMPACT) {
13313 ctxt->options |= XML_PARSE_COMPACT;
13314 options -= XML_PARSE_COMPACT;
13315 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013316 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013317 return (options);
13318}
13319
13320/**
13321 * xmlDoRead:
13322 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013323 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013324 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013325 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013326 * @reuse: keep the context for reuse
13327 *
13328 * Common front-end for the xmlRead functions
13329 *
13330 * Returns the resulting document tree or NULL
13331 */
13332static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013333xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13334 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013335{
13336 xmlDocPtr ret;
13337
13338 xmlCtxtUseOptions(ctxt, options);
13339 if (encoding != NULL) {
13340 xmlCharEncodingHandlerPtr hdlr;
13341
13342 hdlr = xmlFindCharEncodingHandler(encoding);
13343 if (hdlr != NULL)
13344 xmlSwitchToEncoding(ctxt, hdlr);
13345 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013346 if ((URL != NULL) && (ctxt->input != NULL) &&
13347 (ctxt->input->filename == NULL))
13348 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013349 xmlParseDocument(ctxt);
13350 if ((ctxt->wellFormed) || ctxt->recovery)
13351 ret = ctxt->myDoc;
13352 else {
13353 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013354 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013355 xmlFreeDoc(ctxt->myDoc);
13356 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013357 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013358 ctxt->myDoc = NULL;
13359 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013360 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013361 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013362
13363 return (ret);
13364}
13365
13366/**
13367 * xmlReadDoc:
13368 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013369 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013370 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013371 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013372 *
13373 * parse an XML in-memory document and build a tree.
13374 *
13375 * Returns the resulting document tree
13376 */
13377xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013378xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013379{
13380 xmlParserCtxtPtr ctxt;
13381
13382 if (cur == NULL)
13383 return (NULL);
13384
13385 ctxt = xmlCreateDocParserCtxt(cur);
13386 if (ctxt == NULL)
13387 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013388 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013389}
13390
13391/**
13392 * xmlReadFile:
13393 * @filename: a file or URL
13394 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013395 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013396 *
13397 * parse an XML file from the filesystem or the network.
13398 *
13399 * Returns the resulting document tree
13400 */
13401xmlDocPtr
13402xmlReadFile(const char *filename, const char *encoding, int options)
13403{
13404 xmlParserCtxtPtr ctxt;
13405
Daniel Veillard61b93382003-11-03 14:28:31 +000013406 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013407 if (ctxt == NULL)
13408 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013409 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013410}
13411
13412/**
13413 * xmlReadMemory:
13414 * @buffer: a pointer to a char array
13415 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013416 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013417 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013418 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013419 *
13420 * parse an XML in-memory document and build a tree.
13421 *
13422 * Returns the resulting document tree
13423 */
13424xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013425xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013426{
13427 xmlParserCtxtPtr ctxt;
13428
13429 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13430 if (ctxt == NULL)
13431 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013432 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013433}
13434
13435/**
13436 * xmlReadFd:
13437 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013438 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013439 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013440 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013441 *
13442 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013443 * NOTE that the file descriptor will not be closed when the
13444 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013445 *
13446 * Returns the resulting document tree
13447 */
13448xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013449xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013450{
13451 xmlParserCtxtPtr ctxt;
13452 xmlParserInputBufferPtr input;
13453 xmlParserInputPtr stream;
13454
13455 if (fd < 0)
13456 return (NULL);
13457
13458 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13459 if (input == NULL)
13460 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013461 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013462 ctxt = xmlNewParserCtxt();
13463 if (ctxt == NULL) {
13464 xmlFreeParserInputBuffer(input);
13465 return (NULL);
13466 }
13467 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13468 if (stream == NULL) {
13469 xmlFreeParserInputBuffer(input);
13470 xmlFreeParserCtxt(ctxt);
13471 return (NULL);
13472 }
13473 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013474 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013475}
13476
13477/**
13478 * xmlReadIO:
13479 * @ioread: an I/O read function
13480 * @ioclose: an I/O close function
13481 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013482 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013483 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013484 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013485 *
13486 * parse an XML document from I/O functions and source and build a tree.
13487 *
13488 * Returns the resulting document tree
13489 */
13490xmlDocPtr
13491xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013492 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013493{
13494 xmlParserCtxtPtr ctxt;
13495 xmlParserInputBufferPtr input;
13496 xmlParserInputPtr stream;
13497
13498 if (ioread == NULL)
13499 return (NULL);
13500
13501 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13502 XML_CHAR_ENCODING_NONE);
13503 if (input == NULL)
13504 return (NULL);
13505 ctxt = xmlNewParserCtxt();
13506 if (ctxt == NULL) {
13507 xmlFreeParserInputBuffer(input);
13508 return (NULL);
13509 }
13510 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13511 if (stream == NULL) {
13512 xmlFreeParserInputBuffer(input);
13513 xmlFreeParserCtxt(ctxt);
13514 return (NULL);
13515 }
13516 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013517 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013518}
13519
13520/**
13521 * xmlCtxtReadDoc:
13522 * @ctxt: an XML parser context
13523 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013524 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013525 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013526 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013527 *
13528 * parse an XML in-memory document and build a tree.
13529 * This reuses the existing @ctxt parser context
13530 *
13531 * Returns the resulting document tree
13532 */
13533xmlDocPtr
13534xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013535 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013536{
13537 xmlParserInputPtr stream;
13538
13539 if (cur == NULL)
13540 return (NULL);
13541 if (ctxt == NULL)
13542 return (NULL);
13543
13544 xmlCtxtReset(ctxt);
13545
13546 stream = xmlNewStringInputStream(ctxt, cur);
13547 if (stream == NULL) {
13548 return (NULL);
13549 }
13550 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013551 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013552}
13553
13554/**
13555 * xmlCtxtReadFile:
13556 * @ctxt: an XML parser context
13557 * @filename: a file or URL
13558 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013559 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013560 *
13561 * parse an XML file from the filesystem or the network.
13562 * This reuses the existing @ctxt parser context
13563 *
13564 * Returns the resulting document tree
13565 */
13566xmlDocPtr
13567xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13568 const char *encoding, int options)
13569{
13570 xmlParserInputPtr stream;
13571
13572 if (filename == NULL)
13573 return (NULL);
13574 if (ctxt == NULL)
13575 return (NULL);
13576
13577 xmlCtxtReset(ctxt);
13578
Daniel Veillard29614c72004-11-26 10:47:26 +000013579 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013580 if (stream == NULL) {
13581 return (NULL);
13582 }
13583 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013584 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013585}
13586
13587/**
13588 * xmlCtxtReadMemory:
13589 * @ctxt: an XML parser context
13590 * @buffer: a pointer to a char array
13591 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013592 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013593 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013594 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013595 *
13596 * parse an XML in-memory document and build a tree.
13597 * This reuses the existing @ctxt parser context
13598 *
13599 * Returns the resulting document tree
13600 */
13601xmlDocPtr
13602xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013603 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013604{
13605 xmlParserInputBufferPtr input;
13606 xmlParserInputPtr stream;
13607
13608 if (ctxt == NULL)
13609 return (NULL);
13610 if (buffer == NULL)
13611 return (NULL);
13612
13613 xmlCtxtReset(ctxt);
13614
13615 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13616 if (input == NULL) {
13617 return(NULL);
13618 }
13619
13620 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13621 if (stream == NULL) {
13622 xmlFreeParserInputBuffer(input);
13623 return(NULL);
13624 }
13625
13626 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013627 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013628}
13629
13630/**
13631 * xmlCtxtReadFd:
13632 * @ctxt: an XML parser context
13633 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013634 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013635 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013636 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013637 *
13638 * parse an XML from a file descriptor and build a tree.
13639 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013640 * NOTE that the file descriptor will not be closed when the
13641 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013642 *
13643 * Returns the resulting document tree
13644 */
13645xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013646xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13647 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013648{
13649 xmlParserInputBufferPtr input;
13650 xmlParserInputPtr stream;
13651
13652 if (fd < 0)
13653 return (NULL);
13654 if (ctxt == NULL)
13655 return (NULL);
13656
13657 xmlCtxtReset(ctxt);
13658
13659
13660 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13661 if (input == NULL)
13662 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013663 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013664 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13665 if (stream == NULL) {
13666 xmlFreeParserInputBuffer(input);
13667 return (NULL);
13668 }
13669 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013670 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013671}
13672
13673/**
13674 * xmlCtxtReadIO:
13675 * @ctxt: an XML parser context
13676 * @ioread: an I/O read function
13677 * @ioclose: an I/O close function
13678 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013679 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013680 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013681 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013682 *
13683 * parse an XML document from I/O functions and source and build a tree.
13684 * This reuses the existing @ctxt parser context
13685 *
13686 * Returns the resulting document tree
13687 */
13688xmlDocPtr
13689xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13690 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013691 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013692 const char *encoding, int options)
13693{
13694 xmlParserInputBufferPtr input;
13695 xmlParserInputPtr stream;
13696
13697 if (ioread == NULL)
13698 return (NULL);
13699 if (ctxt == NULL)
13700 return (NULL);
13701
13702 xmlCtxtReset(ctxt);
13703
13704 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13705 XML_CHAR_ENCODING_NONE);
13706 if (input == NULL)
13707 return (NULL);
13708 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13709 if (stream == NULL) {
13710 xmlFreeParserInputBuffer(input);
13711 return (NULL);
13712 }
13713 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013714 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013715}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013716
13717#define bottom_parser
13718#include "elfgcchack.h"